# Replication code for Taylor C. Boas, Dino P. Christenson, and David M. Glick, "Recruiting Large Online Samples in the United States and India: Facebook, Mechanical Turk and Qualtrics," Political Science Research and Methods.

# Analysis conducted in R 3.4.3 on MacOS 10.13.2

# NOTE: This file produces figures and tables related to demographics: Main Text Figures 1-2, Appendix Figures 2-4, and Appendix Tables 3, 4, 8, 9, 12, and 13. It also produces results conveyed textually but not in tables or figures. Files should be run in the following order; please see readme.txt for details.
# 	1. clean_us_survey.R
# 	2. clean_india_survey.R
# 	3. merge_external_data_us.R
# 	4. merge_external_data_india.R
# 	5. analyze_demographics.R
# 	6. analyze_spaces.R
# 	7. analyze_politics.R
# 	8. analyze_cooperativeness.R
# 	9. analyze_experiments.R

# Set working directory as appropriate
# setwd('~/Dropbox/sample recruitment shared/replication/')

# Clean desktop and load packages. Please make sure all necessary packages are installed.
rm(list=ls(all=T))
library(car)
library(Hmisc)
library(foreign)
library(survey)
library(lsr)
library(XML)
library(openxlsx)

# Load survey data and external data. For Indian surveys other than WVS, we only have the marginals; using those to do t-tests. Relevant documents are in the main folder.

load('india_completions_augmented.RData')
load('us_completions_augmented.RData')
load('cces_augmented.RData')
load('gss.RData')
load('anes.RData')
load('wvs.RData')
load('census_pop_districts.RData')
pincodes<-read.csv('all_india_PO_list_without_APS_offices_ver2_lat_long.csv',colClasses='character')

# US sample for unweighted analysis: excluding targeted respondents in Facebook/MTurk samples 

us1$nontarg<-((us1$sample=='Qualtrics')|(us1$sample=='Facebook'&us1$targ==0)|(us1$sample=='MTurk'&us1$StartDate <= as.POSIXct('2015-12-31 23:59:59 EST')))

###################################
# Appendix Table 3: US Demographics
###################################

# Generate stacked datasets and weighted design objects for each comparison

anes$edu<-ifelse(unclass(anes$dem_edugroup) %in% 1:5, NA, unclass(anes$dem_edugroup) - 6)
us.fb.anes<-rbind(us1[us1$sample=='Facebook',c('sample','weight','nontarg','Q23')],data.frame(sample=' ANES', weight=1, nontarg=T, Q23=anes$edu))
us.fb.anes.design<-svydesign(ids=~1,weights= us.fb.anes$weight,data= us.fb.anes)
us.mt.anes<-rbind(us1[us1$sample=='MTurk',c('sample','weight','nontarg','Q23')],data.frame(sample=' ANES', weight=1, nontarg=T, Q23=anes$edu))
us.mt.anes.design<-svydesign(ids=~1,weights= us.mt.anes$weight,data= us.mt.anes)
us.qt.anes<-rbind(us1[us1$sample=='Qualtrics',c('sample','weight','nontarg','Q23')],data.frame(sample=' ANES', weight=1, nontarg=T, Q23=anes$edu))
us.qt.anes.design<-svydesign(ids=~1,weights= us.qt.anes$weight,data= us.qt.anes)

gss$income_rev<-recode(gss$income06, '1:8=1;9:12=2;13:15=3;16:17=4')
gss$income_rev[which(gss$income06 >= 18)]<-gss$income06[which(gss$income06 >= 18)] - 13
us1$income_rev<-ifelse(us1$Q31==13, NA, us1$Q31)
gss$male<-gss$sex==1
us1$male<-us1$Q22==1
gss$married<-gss$marital == 1
us1$married<-us1$Q30==1
gss$religious<-gss$relig != 4
us1$religious<-us1$Q26!=5
gss$hisp<-gss$hispanic != 1
us1$hisp<-us1$Q24==1
gss$black<-gss$racecen1 %in% 2 | gss$racecen2 %in% 2 | gss$racecen3 %in% 2
us1$black<-us1$Q25_2==1
gss$white<-gss$racecen1 %in% 1 | gss$racecen2 %in% 1 | gss$racecen3 %in% 1
us1$white<-us1$Q25_1==1
us1$age<-us1$Q3

us.fb.gss<-rbind(us1[us1$sample=='Facebook',c('sample','weight','nontarg','age','income_rev','male','married','religious','hisp','black','white')],data.frame(sample=' GSS', weight=1, nontarg=T, gss[,c('age','income_rev','male','married','religious','hisp','black','white')]))
us.fb.gss.design<-svydesign(ids=~1,weights= us.fb.gss$weight,data= us.fb.gss)
us.mt.gss<-rbind(us1[us1$sample=='MTurk',c('sample','weight','nontarg','age','income_rev','male','married','religious','hisp','black','white')],data.frame(sample=' GSS', weight=1, nontarg=T, gss[,c('age','income_rev','male','married','religious','hisp','black','white')]))
us.mt.gss.design<-svydesign(ids=~1,weights= us.mt.gss$weight,data= us.mt.gss)
us.qt.gss<-rbind(us1[us1$sample=='Qualtrics',c('sample','weight','nontarg','age','income_rev','male','married','religious','hisp','black','white')],data.frame(sample=' GSS', weight=1, nontarg=T, gss[,c('age','income_rev','male','married','religious','hisp','black','white')]))
us.qt.gss.design<-svydesign(ids=~1,weights= us.qt.gss$weight,data= us.qt.gss)

us.fb.cces<-rbind(us1[us1$sample=='Facebook',c('sample','weight','nontarg','density','pop','rucc')], data.frame(sample=' CCES', weight=1, nontarg=T, cces[,c('density','pop','rucc')]))
us.fb.cces.design<-svydesign(ids=~1,weights= us.fb.cces$weight,data= us.fb.cces)
us.mt.cces<-rbind(us1[us1$sample=='MTurk',c('sample','weight','nontarg','density','pop','rucc')], data.frame(sample=' CCES', weight=1, nontarg=T, cces[,c('density','pop','rucc')]))
us.mt.cces.design<-svydesign(ids=~1,weights= us.mt.cces$weight,data= us.mt.cces)
us.qt.cces<-rbind(us1[us1$sample=='Qualtrics',c('sample','weight','nontarg','density','pop','rucc')], data.frame(sample=' CCES', weight=1, nontarg=T, cces[,c('density','pop','rucc')]))
us.qt.cces.design<-svydesign(ids=~1,weights= us.qt.cces$weight,data= us.qt.cces)

# Age
reg.us.age.fb<-with(us.fb.gss[us.fb.gss$nontarg,],lm(age~sample))
reg.us.age.fb.wtd<-svyglm(age~sample, design= us.fb.gss.design)
reg.us.age.mt<-with(us.mt.gss[us.mt.gss$nontarg,],lm(age~sample))
reg.us.age.mt.wtd<-svyglm(age~sample, design= us.mt.gss.design)
reg.us.age.qt<-with(us.qt.gss[us.qt.gss$nontarg,],lm(age~sample))
reg.us.age.qt.wtd<-svyglm(age~sample, design= us.qt.gss.design)

# Education
reg.us.edu.fb<-with(us.fb.anes[us.fb.anes$nontarg,],lm(Q23~sample))
reg.us.edu.fb.wtd<-svyglm(Q23~sample, design= us.fb.anes.design)
reg.us.edu.mt<-with(us.mt.anes[us.mt.anes$nontarg,],lm(Q23~sample))
reg.us.edu.mt.wtd<-svyglm(Q23~sample, design= us.mt.anes.design)
reg.us.edu.qt<-with(us.qt.anes[us.qt.anes$nontarg,],lm(Q23~sample))
reg.us.edu.qt.wtd<-svyglm(Q23~sample, design= us.qt.anes.design)

# Income
reg.us.income_rev.fb<-with(us.fb.gss[us.fb.gss$nontarg,],lm(income_rev~sample))
reg.us.income_rev.fb.wtd<-svyglm(income_rev~sample, design= us.fb.gss.design)
reg.us.income_rev.mt<-with(us.mt.gss[us.mt.gss$nontarg,],lm(income_rev~sample))
reg.us.income_rev.mt.wtd<-svyglm(income_rev~sample, design= us.mt.gss.design)
reg.us.income_rev.qt<-with(us.qt.gss[us.qt.gss$nontarg,],lm(income_rev~sample))
reg.us.income_rev.qt.wtd<-svyglm(income_rev~sample, design= us.qt.gss.design)

# Sex
reg.us.male.fb<-with(us.fb.gss[us.fb.gss$nontarg,],lm(male~sample))
reg.us.male.fb.wtd<-svyglm(male~sample, design= us.fb.gss.design)
reg.us.male.mt<-with(us.mt.gss[us.mt.gss$nontarg,],lm(male~sample))
reg.us.male.mt.wtd<-svyglm(male~sample, design= us.mt.gss.design)
reg.us.male.qt<-with(us.qt.gss[us.qt.gss$nontarg,],lm(male~sample))
reg.us.male.qt.wtd<-svyglm(male~sample, design= us.qt.gss.design)

# Married
reg.us.married.fb<-with(us.fb.gss[us.fb.gss$nontarg,],lm(married~sample))
reg.us.married.fb.wtd<-svyglm(married~sample, design= us.fb.gss.design)
reg.us.married.mt<-with(us.mt.gss[us.mt.gss$nontarg,],lm(married~sample))
reg.us.married.mt.wtd<-svyglm(married~sample, design= us.mt.gss.design)
reg.us.married.qt<-with(us.qt.gss[us.qt.gss$nontarg,],lm(married~sample))
reg.us.married.qt.wtd<-svyglm(married~sample, design= us.qt.gss.design)

# Religious
reg.us.religious.fb<-with(us.fb.gss[us.fb.gss$nontarg,],lm(religious~sample))
reg.us.religious.fb.wtd<-svyglm(religious~sample, design= us.fb.gss.design)
reg.us.religious.mt<-with(us.mt.gss[us.mt.gss$nontarg,],lm(religious~sample))
reg.us.religious.mt.wtd<-svyglm(religious~sample, design= us.mt.gss.design)
reg.us.religious.qt<-with(us.qt.gss[us.qt.gss$nontarg,],lm(religious~sample))
reg.us.religious.qt.wtd<-svyglm(religious~sample, design= us.qt.gss.design)

# Hispanic
reg.us.hisp.fb<-with(us.fb.gss[us.fb.gss$nontarg,],lm(hisp~sample))
reg.us.hisp.fb.wtd<-svyglm(hisp~sample, design= us.fb.gss.design)
reg.us.hisp.mt<-with(us.mt.gss[us.mt.gss$nontarg,],lm(hisp~sample))
reg.us.hisp.mt.wtd<-svyglm(hisp~sample, design= us.mt.gss.design)
reg.us.hisp.qt<-with(us.qt.gss[us.qt.gss$nontarg,],lm(hisp~sample))
reg.us.hisp.qt.wtd<-svyglm(hisp~sample, design= us.qt.gss.design)

# Black
reg.us.black.fb<-with(us.fb.gss[us.fb.gss$nontarg,],lm(black~sample))
reg.us.black.fb.wtd<-svyglm(black~sample, design= us.fb.gss.design)
reg.us.black.mt<-with(us.mt.gss[us.mt.gss$nontarg,],lm(black~sample))
reg.us.black.mt.wtd<-svyglm(black~sample, design= us.mt.gss.design)
reg.us.black.qt<-with(us.qt.gss[us.qt.gss$nontarg,],lm(black~sample))
reg.us.black.qt.wtd<-svyglm(black~sample, design= us.qt.gss.design)

# White
reg.us.white.fb<-with(us.fb.gss[us.fb.gss$nontarg,],lm(white~sample))
reg.us.white.fb.wtd<-svyglm(white~sample, design= us.fb.gss.design)
reg.us.white.mt<-with(us.mt.gss[us.mt.gss$nontarg,],lm(white~sample))
reg.us.white.mt.wtd<-svyglm(white~sample, design= us.mt.gss.design)
reg.us.white.qt<-with(us.qt.gss[us.qt.gss$nontarg,],lm(white~sample))
reg.us.white.qt.wtd<-svyglm(white~sample, design= us.qt.gss.design)

# Population density
reg.us.density.fb<-with(us.fb.cces[us.fb.cces$nontarg,],lm(I(log(density))~sample))
reg.us.density.fb.wtd<-svyglm(I(log(density))~sample, design= us.fb.cces.design)
reg.us.density.mt<-with(us.mt.cces[us.mt.cces$nontarg,],lm(I(log(density))~sample))
reg.us.density.mt.wtd<-svyglm(I(log(density))~sample, design= us.mt.cces.design)
reg.us.density.qt<-with(us.qt.cces[us.qt.cces$nontarg,],lm(I(log(density))~sample))
reg.us.density.qt.wtd<-svyglm(I(log(density))~sample, design= us.qt.cces.design)

# Population
reg.us.pop.fb<-with(us.fb.cces[us.fb.cces$nontarg,],lm(I(log(pop))~sample))
reg.us.pop.fb.wtd<-svyglm(I(log(pop))~sample, design= us.fb.cces.design)
reg.us.pop.mt<-with(us.mt.cces[us.mt.cces$nontarg,],lm(I(log(pop))~sample))
reg.us.pop.mt.wtd<-svyglm(I(log(pop))~sample, design= us.mt.cces.design)
reg.us.pop.qt<-with(us.qt.cces[us.qt.cces$nontarg,],lm(I(log(pop))~sample))
reg.us.pop.qt.wtd<-svyglm(I(log(pop))~sample, design= us.qt.cces.design)

# Rural-urban continuum code
reg.us.rucc.fb<-with(us.fb.cces[us.fb.cces$nontarg,],lm(rucc~sample))
reg.us.rucc.fb.wtd<-svyglm(rucc~sample, design= us.fb.cces.design)
reg.us.rucc.mt<-with(us.mt.cces[us.mt.cces$nontarg,],lm(rucc~sample))
reg.us.rucc.mt.wtd<-svyglm(rucc~sample, design= us.mt.cces.design)
reg.us.rucc.qt<-with(us.qt.cces[us.qt.cces$nontarg,],lm(rucc~sample))
reg.us.rucc.qt.wtd<-svyglm(rucc~sample, design= us.qt.cces.design)

# Gather coefficients for table

pval<-function(reg) ifelse(nrow(coef(summary(reg)))==1,NA,coef(summary(reg))[2,4])

stars<-function(pval){
	stars<-1*(pval < 0.1) +1*(pval < 0.05) + 1*(pval < 0.01) + 1*(pval < 0.001)
	stars[is.na(stars)]<-0
	stars<-as.character(factor(stars,levels=0:4,labels=c('','$\\dagger$','*','**','***')))
	stars
}

est.us.prob<-round(sapply(list(reg.us.age.fb, reg.us.edu.fb, reg.us.income_rev.fb, reg.us.male.fb, reg.us.married.fb, reg.us.religious.fb, reg.us.hisp.fb, reg.us.black.fb, reg.us.white.fb, reg.us.density.fb, reg.us.pop.fb, reg.us.rucc.fb), function(x) coef(x)[1]),2)
est.us.fb<-round(sapply(list(reg.us.age.fb, reg.us.edu.fb, reg.us.income_rev.fb, reg.us.male.fb, reg.us.married.fb, reg.us.religious.fb, reg.us.hisp.fb, reg.us.black.fb, reg.us.white.fb, reg.us.density.fb, reg.us.pop.fb, reg.us.rucc.fb), function(x) sum(coef(x))),2)
pval.us.fb<-sapply(list(reg.us.age.fb, reg.us.edu.fb, reg.us.income_rev.fb, reg.us.male.fb, reg.us.married.fb, reg.us.religious.fb, reg.us.hisp.fb, reg.us.black.fb, reg.us.white.fb, reg.us.density.fb, reg.us.pop.fb, reg.us.rucc.fb),pval)
est.us.fb.wtd<-round(sapply(list(reg.us.age.fb.wtd, reg.us.edu.fb.wtd, reg.us.income_rev.fb.wtd, reg.us.male.fb.wtd, reg.us.married.fb.wtd, reg.us.religious.fb.wtd, reg.us.hisp.fb.wtd, reg.us.black.fb.wtd, reg.us.white.fb.wtd, reg.us.density.fb.wtd, reg.us.pop.fb.wtd, reg.us.rucc.fb.wtd), function(x) sum(coef(x))),2)
pval.us.fb.wtd<-sapply(list(reg.us.age.fb.wtd, reg.us.edu.fb.wtd, reg.us.income_rev.fb.wtd, reg.us.male.fb.wtd, reg.us.married.fb.wtd, reg.us.religious.fb.wtd, reg.us.hisp.fb.wtd, reg.us.black.fb.wtd, reg.us.white.fb.wtd, reg.us.density.fb.wtd, reg.us.pop.fb.wtd, reg.us.rucc.fb.wtd),pval)
est.us.mt<-round(sapply(list(reg.us.age.mt, reg.us.edu.mt, reg.us.income_rev.mt, reg.us.male.mt, reg.us.married.mt, reg.us.religious.mt, reg.us.hisp.mt, reg.us.black.mt, reg.us.white.mt, reg.us.density.mt, reg.us.pop.mt, reg.us.rucc.mt), function(x) sum(coef(x))),2)
pval.us.mt<-sapply(list(reg.us.age.mt, reg.us.edu.mt, reg.us.income_rev.mt, reg.us.male.mt, reg.us.married.mt, reg.us.religious.mt, reg.us.hisp.mt, reg.us.black.mt, reg.us.white.mt, reg.us.density.mt, reg.us.pop.mt, reg.us.rucc.mt),pval)
est.us.mt.wtd<-round(sapply(list(reg.us.age.mt.wtd, reg.us.edu.mt.wtd, reg.us.income_rev.mt.wtd, reg.us.male.mt.wtd, reg.us.married.mt.wtd, reg.us.religious.mt.wtd, reg.us.hisp.mt.wtd, reg.us.black.mt.wtd, reg.us.white.mt.wtd, reg.us.density.mt.wtd, reg.us.pop.mt.wtd, reg.us.rucc.mt.wtd), function(x) sum(coef(x))),2)
pval.us.mt.wtd<-sapply(list(reg.us.age.mt.wtd, reg.us.edu.mt.wtd, reg.us.income_rev.mt.wtd, reg.us.male.mt.wtd, reg.us.married.mt.wtd, reg.us.religious.mt.wtd, reg.us.hisp.mt.wtd, reg.us.black.mt.wtd, reg.us.white.mt.wtd, reg.us.density.mt.wtd, reg.us.pop.mt.wtd, reg.us.rucc.mt.wtd),pval)
est.us.qt<-round(sapply(list(reg.us.age.qt, reg.us.edu.qt, reg.us.income_rev.qt, reg.us.male.qt, reg.us.married.qt, reg.us.religious.qt, reg.us.hisp.qt, reg.us.black.qt, reg.us.white.qt, reg.us.density.qt, reg.us.pop.qt, reg.us.rucc.qt), function(x) sum(coef(x))),2)
pval.us.qt<-sapply(list(reg.us.age.qt, reg.us.edu.qt, reg.us.income_rev.qt, reg.us.male.qt, reg.us.married.qt, reg.us.religious.qt, reg.us.hisp.qt, reg.us.black.qt, reg.us.white.qt, reg.us.density.qt, reg.us.pop.qt, reg.us.rucc.qt),pval)
est.us.qt.wtd<-round(sapply(list(reg.us.age.qt.wtd, reg.us.edu.qt.wtd, reg.us.income_rev.qt.wtd, reg.us.male.qt.wtd, reg.us.married.qt.wtd, reg.us.religious.qt.wtd, reg.us.hisp.qt.wtd, reg.us.black.qt.wtd, reg.us.white.qt.wtd, reg.us.density.qt.wtd, reg.us.pop.qt.wtd, reg.us.rucc.qt.wtd), function(x) sum(coef(x))),2)
pval.us.qt.wtd<-sapply(list(reg.us.age.qt.wtd, reg.us.edu.qt.wtd, reg.us.income_rev.qt.wtd, reg.us.male.qt.wtd, reg.us.married.qt.wtd, reg.us.religious.qt.wtd, reg.us.hisp.qt.wtd, reg.us.black.qt.wtd, reg.us.white.qt.wtd, reg.us.density.qt.wtd, reg.us.pop.qt.wtd, reg.us.rucc.qt.wtd),pval)

pval.adj.us<-p.adjust(c(pval.us.fb, pval.us.fb.wtd, pval.us.mt, pval.us.mt.wtd, pval.us.qt, pval.us.qt.wtd),method='BH')
stars.us<-data.frame(matrix(stars(pval.adj.us),nrow=length(est.us.prob)))
names(stars.us)<-c('fb','fb.wtd','mt','mt.wtd','qt','qt.wtd')

us.demog.table<-data.frame(prob=as.character(est.us.prob),fb=paste(est.us.fb,stars.us$fb,sep=''), fb.wtd=paste(est.us.fb.wtd,stars.us$fb.wtd,sep=''), mt=paste(est.us.mt,stars.us$mt,sep=''), mt.wtd=paste(est.us.mt.wtd,stars.us$mt.wtd,sep=''), qt=paste(est.us.qt,stars.us$qt,sep=''), qt.wtd=paste(est.us.qt.wtd,stars.us$qt.wtd,sep=''),stringsAsFactors=F)
rownames(us.demog.table)<-c('Age','Education (1--5)','Income (1--12)','Male','Married','Religious','Hispanic','Black','White','Log Pop. Density','Log Population','RUCC')
colnames(us.demog.table)<-c('Probability\nSample','Facebook','Facebook\nWeighted','MTurk','MTurk\nWeighted','Qualtrics','Qualtrics\nWeighted')

# Bold results that are consistent with hypotheses; underline results that go against.
us.hypo.table<-matrix(
	c(NA,F,NA,T,NA,NA,NA,
	NA,T,NA,T,NA,NA,NA,
	NA,T,NA,T,NA,NA,NA,
	NA,T,NA,F,NA,NA,NA,
	NA,F,NA,T,NA,NA,NA,
	NA,NA,NA,T,NA,NA,NA,
	NA,NA,NA,T,NA,NA,NA,
	NA,NA,NA,T,NA,NA,NA,
	NA,NA,NA,F,NA,NA,NA,
	NA,NA,NA,NA,NA,NA,NA,
	NA,NA,NA,F,NA,NA,NA,
	NA,NA,NA,T,NA,NA,NA), byrow=T,nrow=length(est.us.prob))

for (i in 1:nrow(us.demog.table)){
	for (j in 1:ncol(us.demog.table)){
		if (is.na(us.hypo.table[i,j])) next
		us.demog.table[i,j]<-ifelse(us.hypo.table[i,j], paste0('\\textbf{', us.demog.table[i,j],'}'), paste0('\\underline{', us.demog.table[i,j],'}'))
	}
}

us.demog.table.note<-"NOTE: Entries are mean or weighted mean values for each sample. Probability sample is the 2012 American National Election Study (education; non-oversampled face-to-face interviews only), the 2014 Cooperative Congressional Election Survey (population density), or the 2014 General Social Survey cross-section (all other variables). Population and population density (in residents per square kilometer) are those of the county associated with the respondent's ZIP code. RUCC is Rural-Urban Continuum Code (higher numbers are more rural). Weights are post-stratification, based on region (4 categories), age (quartiles), and sex in the 2010 census. Stars report significance levels from two-tailed difference-in-means t-tests comparing the probability and convenience sample quantities, with adjustment for multiple comparisons using the Benjamini-Hochberg method (all hypotheses in the table are a single family). Results in bold type support pre-registered hypotheses, underlined results contradict pre-registered hypotheses, and those in regular font were not pre-registered. $\\dagger$ p$<$0.1, *p$<$0.05, **p$<$0.01, ***p$<$0.001."

us.demog.table.latex<-latex(us.demog.table,file='us_demog_table.tex', rowlabel = '', collabel.just=rep('c',ncol(us.demog.table)), col.just=rep('c',ncol(us.demog.table)), caption = 'U.S. Demographics: Convenience versus Probability Samples', extracolsize='normalsize', insert.bottom= us.demog.table.note, booktabs = F, ctable = T, where = "htp")

##########################################################
# Main Text Figure 1 and Appendix Table 8: US Demographics
##########################################################

# Weighted design objects, convenience samples only
us.fb.design<-svydesign(ids=~1,weights= us1$weight[us1$sample=='Facebook'],data= us1[us1$sample=='Facebook',])
us.mt.design<-svydesign(ids=~1,weights= us1$weight[us1$sample=='MTurk'],data= us1[us1$sample=='MTurk',])
us.qt.design<-svydesign(ids=~1,weights= us1$weight[us1$sample=='Qualtrics'],data= us1[us1$sample=='Qualtrics',])

# Age
mean.us.age.gss<-with(us.fb.gss[us.fb.gss$nontarg & us.fb.gss$sample==' GSS',],lm(I((age-18)/(100-18))~1))
mean.us.age.fb<-with(us.fb.gss[us.fb.gss$nontarg & us.fb.gss$sample=='Facebook',],lm(I((age-18)/(100-18))~1))
mean.us.age.mt<-with(us.mt.gss[us.mt.gss$nontarg & us.mt.gss$sample=='MTurk',],lm(I((age-18)/(100-18))~1))
mean.us.age.qt<-with(us.qt.gss[us.qt.gss$nontarg & us.qt.gss$sample=='Qualtrics',],lm(I((age-18)/(100-18))~1))

mean.us.age.fb.wtd<-svyglm(I((age-18)/(100-18))~1, design= us.fb.design)
mean.us.age.mt.wtd<-svyglm(I((age-18)/(100-18))~1, design= us.mt.design)
mean.us.age.qt.wtd<-svyglm(I((age-18)/(100-18))~1, design= us.qt.design)

sd.us.age.gss<-with(us.fb.gss[us.fb.gss$nontarg & us.fb.gss$sample==' GSS',],sd(I((age-18)/(100-18)),na.rm=T))

# Education
mean.us.edu.anes<-with(us.fb.anes[us.fb.anes$nontarg & us.fb.anes$sample==' ANES',],lm(I((Q23-1)/4)~1))
mean.us.edu.fb<-with(us.fb.anes[us.fb.anes$nontarg & us.fb.anes$sample=='Facebook',],lm(I((Q23-1)/4)~1))
mean.us.edu.mt<-with(us.mt.anes[us.mt.anes$nontarg & us.mt.anes$sample=='MTurk',],lm(I((Q23-1)/4)~1))
mean.us.edu.qt<-with(us.qt.anes[us.qt.anes$nontarg & us.qt.anes$sample=='Qualtrics',],lm(I((Q23-1)/4)~1))

mean.us.edu.fb.wtd<-svyglm(I((Q23-1)/4)~1, design= us.fb.design)
mean.us.edu.mt.wtd<-svyglm(I((Q23-1)/4)~1, design= us.mt.design)
mean.us.edu.qt.wtd<-svyglm(I((Q23-1)/4)~1, design= us.qt.design)

sd.us.edu.anes<-with(us.fb.anes[us.fb.anes$nontarg & us.fb.anes$sample==' ANES',],sd(I((Q23-1)/4),na.rm=T))

# Income
mean.us.income.gss<-with(us.fb.gss[us.fb.gss$nontarg & us.fb.gss$sample==' GSS',],lm(I((income_rev-1)/11)~1))
mean.us.income.fb<-with(us.fb.gss[us.fb.gss$nontarg & us.fb.gss$sample=='Facebook',],lm(I((income_rev-1)/11)~1))
mean.us.income.mt<-with(us.mt.gss[us.mt.gss$nontarg & us.mt.gss$sample=='MTurk',],lm(I((income_rev-1)/11)~1))
mean.us.income.qt<-with(us.qt.gss[us.qt.gss$nontarg & us.qt.gss$sample=='Qualtrics',],lm(I((income_rev-1)/11)~1))

mean.us.income.fb.wtd<-svyglm(I((income_rev-1)/11)~1, design= us.fb.design)
mean.us.income.mt.wtd<-svyglm(I((income_rev-1)/11)~1, design= us.mt.design)
mean.us.income.qt.wtd<-svyglm(I((income_rev-1)/11)~1, design= us.qt.design)

sd.us.income.gss<-with(us.fb.gss[us.fb.gss$nontarg & us.fb.gss$sample==' GSS',],sd(I((income_rev-1)/11),na.rm=T))

# Sex
mean.us.male.gss<-with(us.fb.gss[us.fb.gss$nontarg & us.fb.gss$sample==' GSS',],lm(male~1))
mean.us.male.fb<-with(us.fb.gss[us.fb.gss$nontarg & us.fb.gss$sample=='Facebook',],lm(male~1))
mean.us.male.mt<-with(us.mt.gss[us.mt.gss$nontarg & us.mt.gss$sample=='MTurk',],lm(male~1))
mean.us.male.qt<-with(us.qt.gss[us.qt.gss$nontarg & us.qt.gss$sample=='Qualtrics',],lm(male~1))

mean.us.male.fb.wtd<-svyglm(male~1, design= us.fb.design)
mean.us.male.mt.wtd<-svyglm(male~1, design= us.mt.design)
mean.us.male.qt.wtd<-svyglm(male~1, design= us.qt.design)

sd.us.male.gss<-with(us.fb.gss[us.fb.gss$nontarg & us.fb.gss$sample==' GSS',],sd(male,na.rm=T))

# Married
mean.us.married.gss<-with(us.fb.gss[us.fb.gss$nontarg & us.fb.gss$sample==' GSS',],lm(married~1))
mean.us.married.fb<-with(us.fb.gss[us.fb.gss$nontarg & us.fb.gss$sample=='Facebook',],lm(married~1))
mean.us.married.mt<-with(us.mt.gss[us.mt.gss$nontarg & us.mt.gss$sample=='MTurk',],lm(married~1))
mean.us.married.qt<-with(us.qt.gss[us.qt.gss$nontarg & us.qt.gss$sample=='Qualtrics',],lm(married~1))

mean.us.married.fb.wtd<-svyglm(married~1, design= us.fb.design)
mean.us.married.mt.wtd<-svyglm(married~1, design= us.mt.design)
mean.us.married.qt.wtd<-svyglm(married~1, design= us.qt.design)

sd.us.married.gss<-with(us.fb.gss[us.fb.gss$nontarg & us.fb.gss$sample==' GSS',],sd(married,na.rm=T))

# Religious
mean.us.religious.gss<-with(us.fb.gss[us.fb.gss$nontarg & us.fb.gss$sample==' GSS',],lm(religious~1))
mean.us.religious.fb<-with(us.fb.gss[us.fb.gss$nontarg & us.fb.gss$sample=='Facebook',],lm(religious~1))
mean.us.religious.mt<-with(us.mt.gss[us.mt.gss$nontarg & us.mt.gss$sample=='MTurk',],lm(religious~1))
mean.us.religious.qt<-with(us.qt.gss[us.qt.gss$nontarg & us.qt.gss$sample=='Qualtrics',],lm(religious~1))

mean.us.religious.fb.wtd<-svyglm(religious~1, design= us.fb.design)
mean.us.religious.mt.wtd<-svyglm(religious~1, design= us.mt.design)
mean.us.religious.qt.wtd<-svyglm(religious~1, design= us.qt.design)

sd.us.religious.gss<-with(us.fb.gss[us.fb.gss$nontarg & us.fb.gss$sample==' GSS',],sd(religious,na.rm=T))

# Hispanic
mean.us.hisp.gss<-with(us.fb.gss[us.fb.gss$nontarg & us.fb.gss$sample==' GSS',],lm(hisp~1))
mean.us.hisp.fb<-with(us.fb.gss[us.fb.gss$nontarg & us.fb.gss$sample=='Facebook',],lm(hisp~1))
mean.us.hisp.mt<-with(us.mt.gss[us.mt.gss$nontarg & us.mt.gss$sample=='MTurk',],lm(hisp~1))
mean.us.hisp.qt<-with(us.qt.gss[us.qt.gss$nontarg & us.qt.gss$sample=='Qualtrics',],lm(hisp~1))

mean.us.hisp.fb.wtd<-svyglm(hisp~1, design= us.fb.design)
mean.us.hisp.mt.wtd<-svyglm(hisp~1, design= us.mt.design)
mean.us.hisp.qt.wtd<-svyglm(hisp~1, design= us.qt.design)

sd.us.hisp.gss<-with(us.fb.gss[us.fb.gss$nontarg & us.fb.gss$sample==' GSS',],sd(hisp,na.rm=T))

# Black
mean.us.black.gss<-with(us.fb.gss[us.fb.gss$nontarg & us.fb.gss$sample==' GSS',],lm(black~1))
mean.us.black.fb<-with(us.fb.gss[us.fb.gss$nontarg & us.fb.gss$sample=='Facebook',],lm(black~1))
mean.us.black.mt<-with(us.mt.gss[us.mt.gss$nontarg & us.mt.gss$sample=='MTurk',],lm(black~1))
mean.us.black.qt<-with(us.qt.gss[us.qt.gss$nontarg & us.qt.gss$sample=='Qualtrics',],lm(black~1))

mean.us.black.fb.wtd<-svyglm(black~1, design= us.fb.design)
mean.us.black.mt.wtd<-svyglm(black~1, design= us.mt.design)
mean.us.black.qt.wtd<-svyglm(black~1, design= us.qt.design)

sd.us.black.gss<-with(us.fb.gss[us.fb.gss$nontarg & us.fb.gss$sample==' GSS',],sd(black,na.rm=T))

# White
mean.us.white.gss<-with(us.fb.gss[us.fb.gss$nontarg & us.fb.gss$sample==' GSS',],lm(white~1))
mean.us.white.fb<-with(us.fb.gss[us.fb.gss$nontarg & us.fb.gss$sample=='Facebook',],lm(white~1))
mean.us.white.mt<-with(us.mt.gss[us.mt.gss$nontarg & us.mt.gss$sample=='MTurk',],lm(white~1))
mean.us.white.qt<-with(us.qt.gss[us.qt.gss$nontarg & us.qt.gss$sample=='Qualtrics',],lm(white~1))

mean.us.white.fb.wtd<-svyglm(white~1, design= us.fb.design)
mean.us.white.mt.wtd<-svyglm(white~1, design= us.mt.design)
mean.us.white.qt.wtd<-svyglm(white~1, design= us.qt.design)

sd.us.white.gss<-with(us.fb.gss[us.fb.gss$nontarg & us.fb.gss$sample==' GSS',],sd(white,na.rm=T))

# Population density. Scaling based on empirical range in the population.
county_codes<-read.xlsx('ruralurbancodes2013.xlsx',sheet=1) # From http://www.ers.usda.gov/data-products/rural-urban-continuum-codes/.aspx
county_data<-read.delim('2014_Gaz_counties_national.txt',colClasses='character')
county_codes<-merge(county_codes,county_data[,c('GEOID','ALAND')],by.x='FIPS',by.y='GEOID')

dens.range.us<-range(log(county_codes$Population_2010/(as.numeric(county_codes$ALAND)/1000000)))

mean.us.density.cces<-with(us.fb.cces[us.fb.cces$nontarg & us.fb.cces$sample==' CCES',],lm(I((log(density)-dens.range.us[1])/(diff(dens.range.us)))~1))
mean.us.density.fb<-with(us.fb.cces[us.fb.cces$nontarg & us.fb.cces$sample=='Facebook',],lm(I((log(density)-dens.range.us[1])/(diff(dens.range.us)))~1))
mean.us.density.mt<-with(us.mt.cces[us.mt.cces$nontarg & us.mt.cces$sample=='MTurk',],lm(I((log(density)-dens.range.us[1])/(diff(dens.range.us)))~1))
mean.us.density.qt<-with(us.qt.cces[us.qt.cces$nontarg & us.qt.cces$sample=='Qualtrics',],lm(I((log(density)-dens.range.us[1])/(diff(dens.range.us)))~1))

# Helper function since svyglm requires all variables to be in design= argument
scaledensity<-function(x) (log(x)-dens.range.us[1])/(diff(dens.range.us))

mean.us.density.fb.wtd<-svyglm(scaledensity(density)~1, design= us.fb.design)
mean.us.density.mt.wtd<-svyglm(scaledensity(density)~1, design= us.mt.design)
mean.us.density.qt.wtd<-svyglm(scaledensity(density)~1, design= us.qt.design)

sd.us.density.cces<-with(us.fb.cces[us.fb.cces$nontarg & us.fb.cces$sample==' CCES',],sd(I((log(density)-dens.range.us[1])/(diff(dens.range.us))),na.rm=T))

# Gather coefficients

mean.us.rep<-sapply(list(mean.us.age.gss, mean.us.edu.anes, mean.us.income.gss, mean.us.male.gss, mean.us.married.gss, mean.us.religious.gss, mean.us.hisp.gss, mean.us.black.gss, mean.us.white.gss, mean.us.density.cces),coef)
mean.us.fb<-sapply(list(mean.us.age.fb, mean.us.edu.fb, mean.us.income.fb, mean.us.male.fb, mean.us.married.fb, mean.us.religious.fb, mean.us.hisp.fb, mean.us.black.fb, mean.us.white.fb, mean.us.density.fb),coef)
mean.us.mt<-sapply(list(mean.us.age.mt, mean.us.edu.mt, mean.us.income.mt, mean.us.male.mt, mean.us.married.mt, mean.us.religious.mt, mean.us.hisp.mt, mean.us.black.mt, mean.us.white.mt, mean.us.density.mt),coef)
mean.us.qt<-sapply(list(mean.us.age.qt, mean.us.edu.qt, mean.us.income.qt, mean.us.male.qt, mean.us.married.qt, mean.us.religious.qt, mean.us.hisp.qt, mean.us.black.qt, mean.us.white.qt, mean.us.density.qt),coef)

se.us.rep<-sapply(list(mean.us.age.gss, mean.us.edu.anes, mean.us.income.gss, mean.us.male.gss, mean.us.married.gss, mean.us.religious.gss, mean.us.hisp.gss, mean.us.black.gss, mean.us.white.gss, mean.us.density.cces), function(x) coef(summary(x))[2])
se.us.fb<-sapply(list(mean.us.age.fb, mean.us.edu.fb, mean.us.income.fb, mean.us.male.fb, mean.us.married.fb, mean.us.religious.fb, mean.us.hisp.fb, mean.us.black.fb, mean.us.white.fb, mean.us.density.fb), function(x) coef(summary(x))[2])
se.us.mt<-sapply(list(mean.us.age.mt, mean.us.edu.mt, mean.us.income.mt, mean.us.male.mt, mean.us.married.mt, mean.us.religious.mt, mean.us.hisp.mt, mean.us.black.mt, mean.us.white.mt, mean.us.density.mt), function(x) coef(summary(x))[2])
se.us.qt<-sapply(list(mean.us.age.qt, mean.us.edu.qt, mean.us.income.qt, mean.us.male.qt, mean.us.married.qt, mean.us.religious.qt, mean.us.hisp.qt, mean.us.black.qt, mean.us.white.qt, mean.us.density.qt), function(x) coef(summary(x))[2])

mean.us.fb.wtd<-sapply(list(mean.us.age.fb.wtd, mean.us.edu.fb.wtd, mean.us.income.fb.wtd, mean.us.male.fb.wtd, mean.us.married.fb.wtd, mean.us.religious.fb.wtd, mean.us.hisp.fb.wtd, mean.us.black.fb.wtd, mean.us.white.fb.wtd, mean.us.density.fb.wtd),coef)
mean.us.mt.wtd<-sapply(list(mean.us.age.mt.wtd, mean.us.edu.mt.wtd, mean.us.income.mt.wtd, mean.us.male.mt.wtd, mean.us.married.mt.wtd, mean.us.religious.mt.wtd, mean.us.hisp.mt.wtd, mean.us.black.mt.wtd, mean.us.white.mt.wtd, mean.us.density.mt.wtd),coef)
mean.us.qt.wtd<-sapply(list(mean.us.age.qt.wtd, mean.us.edu.qt.wtd, mean.us.income.qt.wtd, mean.us.male.qt.wtd, mean.us.married.qt.wtd, mean.us.religious.qt.wtd, mean.us.hisp.qt.wtd, mean.us.black.qt.wtd, mean.us.white.qt.wtd, mean.us.density.qt.wtd),coef)
se.us.fb.wtd<-sapply(list(mean.us.age.fb.wtd, mean.us.edu.fb.wtd, mean.us.income.fb.wtd, mean.us.male.fb.wtd, mean.us.married.fb.wtd, mean.us.religious.fb.wtd, mean.us.hisp.fb.wtd, mean.us.black.fb.wtd, mean.us.white.fb.wtd, mean.us.density.fb.wtd), function(x) coef(summary(x))[2])
se.us.mt.wtd<-sapply(list(mean.us.age.mt.wtd, mean.us.edu.mt.wtd, mean.us.income.mt.wtd, mean.us.male.mt.wtd, mean.us.married.mt.wtd, mean.us.religious.mt.wtd, mean.us.hisp.mt.wtd, mean.us.black.mt.wtd, mean.us.white.mt.wtd, mean.us.density.mt.wtd), function(x) coef(summary(x))[2])
se.us.qt.wtd<-sapply(list(mean.us.age.qt.wtd, mean.us.edu.qt.wtd, mean.us.income.qt.wtd, mean.us.male.qt.wtd, mean.us.married.qt.wtd, mean.us.religious.qt.wtd, mean.us.hisp.qt.wtd, mean.us.black.qt.wtd, mean.us.white.qt.wtd, mean.us.density.qt.wtd), function(x) coef(summary(x))[2])

diff.us.fb<-abs(mean.us.fb-mean.us.rep)
diff.us.mt<-abs(mean.us.mt-mean.us.rep)
diff.us.qt<-abs(mean.us.qt-mean.us.rep)
which.closest<-apply(cbind(diff.us.fb,diff.us.mt,diff.us.qt),1, which.min)
col.us.fb<-ifelse(which.closest==1,'red','black')
col.us.mt<-ifelse(which.closest==2,'red','black')
col.us.qt<-ifelse(which.closest==3,'red','black')

sd.us.rep<-c(sd.us.age.gss, sd.us.edu.anes, sd.us.income.gss, sd.us.male.gss, sd.us.married.gss, sd.us.religious.gss, sd.us.hisp.gss, sd.us.black.gss, sd.us.white.gss, sd.us.density.cces)

std.diff.us.fb<-(mean.us.fb-mean.us.rep)/sd.us.rep
std.diff.us.mt<-(mean.us.mt-mean.us.rep)/sd.us.rep
std.diff.us.qt<-(mean.us.qt-mean.us.rep)/sd.us.rep
std.diff.us.fb.wtd<-(mean.us.fb.wtd-mean.us.rep)/sd.us.rep
std.diff.us.mt.wtd<-(mean.us.mt.wtd-mean.us.rep)/sd.us.rep
std.diff.us.qt.wtd<-(mean.us.qt.wtd-mean.us.rep)/sd.us.rep

# Standardized difference table (Appendix Table 8)

us.stddiff.demog.table<-round(data.frame(fb= std.diff.us.fb, fb.wtd= std.diff.us.fb.wtd, mt= std.diff.us.mt, mt.wtd= std.diff.us.mt.wtd, qt= std.diff.us.qt, qt.wtd= std.diff.us.qt.wtd),2)
rownames(us.stddiff.demog.table)<-c('Age','Education','Income','Male','Married','Religious','Hispanic','Black','White','Log Pop. Density')
colnames(us.stddiff.demog.table)<-c('Facebook','Facebook\nWeighted','MTurk','MTurk\nWeighted','Qualtrics','Qualtrics\nWeighted')

us.stddiff.demog.table.note<-'NOTE: Entries are mean differences between the convenience and probability sample figures listed in Table \\ref{us.demog.table}, divided by the standard deviation in the probability sample.'

us.stddiff.demog.table.latex<-latex(us.stddiff.demog.table,file='us_stddiff_demog_table.tex', rowlabel = '', collabel.just=rep('c',ncol(us.stddiff.demog.table)), col.just=rep('c',ncol(us.stddiff.demog.table)), caption = 'U.S. Demographics: Standardized Difference from Probability Samples', extracolsize='normalsize', insert.bottom= us.stddiff.demog.table.note, booktabs = F, ctable = T, where = "htp")

# Plot (Main Text Figure 1)

lwd<-1.5
adjust<-0.15

pdf(file='us_demog.pdf',width=10,height=7)
par(mfrow=c(1,2))

par(mar=c(3,6,2,0)+.01)
plot(mean.us.rep, 10:1+1.5*adjust, type='p',axes=F,frame.plot=T,xlab='',ylab='',pch=19,xlim=0:1,ylim=c(1-1.5*adjust,10+1.5*adjust),main='Unweighted',sub='')
Axis(side=1,at=seq(0,1,.25),labels= seq(0,1,.25))
Axis(side=2,at=c(11:0),labels=c('','Age','Education','Income','Male','Married','Religious','Hispanic','Black','White','Population\nDensity',''),las=1)
abline(h=1:9+.5,lwd=1,lty=3)
segments(mean.us.rep-qnorm(.975)*se.us.rep,10:1+1.5*adjust, mean.us.rep+qnorm(.975)*se.us.rep,10:1+1.5*adjust,lwd=lwd)
segments(mean.us.fb-qnorm(.975)*se.us.fb,10:1+0.5*adjust, mean.us.fb+qnorm(.975)*se.us.fb,10:1+0.5*adjust,lwd=lwd)
segments(mean.us.mt-qnorm(.975)*se.us.mt,10:1-0.5*adjust, mean.us.mt+qnorm(.975)*se.us.mt,10:1-0.5*adjust,lwd=lwd)
segments(mean.us.qt-qnorm(.975)*se.us.qt,10:1-1.5*adjust, mean.us.qt+qnorm(.975)*se.us.qt,10:1-1.5*adjust,lwd=lwd)
points(mean.us.fb, 10:1+0.5*adjust, pch=21, bg='white')
points(mean.us.mt, 10:1-0.5*adjust, pch=8)
points(mean.us.qt, 10:1-1.5*adjust, pch=25, bg='white')

par(mar=c(3,2,2,4)+.01)
plot(mean.us.rep, 10:1+1.5*adjust, type='p',axes=F,frame.plot=T,xlab='',ylab='',pch=19,xlim=0:1,ylim=c(1-1.5*adjust,10+1.5*adjust),main='Weighted',sub='')
Axis(side=1,at=seq(0,1,.25),labels= seq(0,1,.25))
abline(h=1:9+.5,lwd=1,lty=3)
segments(mean.us.rep-qnorm(.975)*se.us.rep,10:1+1.5*adjust, mean.us.rep+qnorm(.975)*se.us.rep,10:1+1.5*adjust,lwd=lwd)
segments(mean.us.fb.wtd-qnorm(.975)*se.us.fb.wtd,10:1+0.5*adjust, mean.us.fb.wtd+qnorm(.975)*se.us.fb.wtd,10:1+0.5*adjust,lwd=lwd)
segments(mean.us.mt.wtd-qnorm(.975)*se.us.mt.wtd,10:1-0.5*adjust, mean.us.mt.wtd+qnorm(.975)*se.us.mt.wtd,10:1-0.5*adjust,lwd=lwd)
segments(mean.us.qt.wtd-qnorm(.975)*se.us.qt.wtd,10:1-1.5*adjust, mean.us.qt.wtd+qnorm(.975)*se.us.qt.wtd,10:1-1.5*adjust,lwd=lwd)
points(mean.us.fb.wtd, 10:1+0.5*adjust, pch=21, bg='white')
points(mean.us.mt.wtd, 10:1-0.5*adjust, pch=8)
points(mean.us.qt.wtd, 10:1-1.5*adjust, pch=25, bg='white')
legend('bottomleft',pch=c(19,21,8,25),legend=c('Probability','Facebook','MTurk','Qualtrics'),title='Sample',bg='white')

dev.off()

#######################################
# Appendix Table 4: Indian Demographics
#######################################

# Generate stacked datasets and weighted design objects for each comparison

india.fb.wvs<-rbind(india1[india1$sample=='Facebook',c('sample','weight','Q21')],data.frame(sample=' WVS', weight=1, Q21= unclass(wvs$V248_CS)-280))
india.fb.wvs.design<-svydesign(ids=~1,weights= india.fb.wvs$weight,data=india.fb.wvs)
india.mt.wvs<-rbind(india1[india1$sample=='MTurk',c('sample','weight','Q21')],data.frame(sample=' WVS', weight=1, Q21= unclass(wvs$V248_CS)-280))
india.mt.wvs.design<-svydesign(ids=~1,weights= india.mt.wvs$weight,data=india.mt.wvs)
india.qt.wvs<-rbind(india1[india1$sample=='Qualtrics',c('sample','weight','Q21')],data.frame(sample=' WVS', weight=1, Q21= unclass(wvs$V248_CS)-280))
india.qt.wvs.design<-svydesign(ids=~1,weights= india.qt.wvs$weight,data=india.qt.wvs)

nes_age<-c(rep(1,3518),rep(2,5925),rep(3,5010),rep(4,3672),rep(5,4116),rep(NA,60))
nes_edu<-c(rep(0,5583),rep(1,1716),rep(2,2842),rep(3,2875),rep(4,3222),rep(5,2717),rep(6,2177),rep(7,645),rep(8,294),rep(NA,230))
nes_inc<-c(rep(1,2582),rep(2,2419),rep(3,2797),rep(4,1845),rep(5,2542),rep(6,5116),rep(7,2873),rep(8,2126),NA)
nes_male<-c(rep(1,11860),rep(0,10441))
nes_married<-c(rep(1,17320+478),rep(0, 20951-17320-478),rep(NA,22301-20951)) # Marital status from 2014 NES pre-election survey. Padding out end of vector with NAs to have final length same as others from post-election survey.
nes_lowercaste<-c(rep(1, 4452+1692+9404),rep(0, 6618+134),NA)

india1$age<-1*(india1$Q3 <=25) + 2*(india1$Q3 >= 26 & india1$Q3 <=35) + 3*(india1$Q3 >= 36 & india1$Q3 <=45) + 4*(india1$Q3 >= 46 & india1$Q3 <=55) + 5*(india1$Q3 >= 56)
india1$income<-ifelse(india1$Q28==9, NA, india1$Q28)
india1$male<-india1$Q20==1
india1$married<-india1$Q27==2
india1$lowercaste<-india1$Q22 %in% 1:3

india.fb.nes<-rbind(india1[india1$sample=='Facebook',c('sample','weight','age','income','male','Q21','lowercaste','married','density','pop')],data.frame(sample=' NES', weight=1, age=nes_age, income=nes_inc, male=nes_male, Q21=nes_edu, lowercaste=nes_lowercaste, married=nes_married, density=NA, pop=NA))
india.fb.nes.design<-svydesign(ids=~1,weights= india.fb.nes$weight,data=india.fb.nes)
india.mt.nes<-rbind(india1[india1$sample=='MTurk',c('sample','weight','age','income','male','Q21','lowercaste','married','density','pop')],data.frame(sample=' NES', weight=1, age=nes_age, income=nes_inc, male=nes_male, Q21=nes_edu, lowercaste=nes_lowercaste, married=nes_married, density=NA, pop=NA))
india.mt.nes.design<-svydesign(ids=~1,weights= india.mt.nes$weight,data=india.mt.nes)
india.qt.nes<-rbind(india1[india1$sample=='Qualtrics',c('sample','weight','age','income','male','Q21','lowercaste','married','density','pop')],data.frame(sample=' NES', weight=1, age=nes_age, income=nes_inc, male=nes_male, Q21=nes_edu, lowercaste=nes_lowercaste, married=nes_married, density=NA, pop=NA))
india.qt.nes.design<-svydesign(ids=~1,weights= india.qt.nes$weight,data=india.qt.nes)

# Education
reg.india.edu.fb.wvs<-lm(india.fb.wvs$Q21~ india.fb.wvs$sample)
reg.india.edu.fb.wvs.wtd<-svyglm(Q21~sample, design=india.fb.wvs.design)
reg.india.edu.fb.nes<-lm(india.fb.nes$Q21~ india.fb.nes$sample)
reg.india.edu.fb.nes.wtd<-svyglm(Q21~sample, design=india.fb.nes.design)

reg.india.edu.mt.wvs<-lm(india.mt.wvs$Q21~ india.mt.wvs$sample)
reg.india.edu.mt.wvs.wtd<-svyglm(Q21~sample, design=india.mt.wvs.design)
reg.india.edu.mt.nes<-lm(india.mt.nes$Q21~ india.mt.nes$sample)
reg.india.edu.mt.nes.wtd<-svyglm(Q21~sample, design=india.mt.nes.design)

reg.india.edu.qt.wvs<-lm(india.qt.wvs$Q21~ india.qt.wvs$sample)
reg.india.edu.qt.wvs.wtd<-svyglm(Q21~sample, design=india.qt.wvs.design)
reg.india.edu.qt.nes<-lm(india.qt.nes$Q21~ india.qt.nes$sample)
reg.india.edu.qt.nes.wtd<-svyglm(Q21~sample, design=india.qt.nes.design)

# Age
reg.india.age.fb<-lm(india.fb.nes$age~ india.fb.nes$sample)
reg.india.age.fb.wtd<-svyglm(age~sample, design= india.fb.nes.design)
reg.india.age.mt<-lm(india.mt.nes$age~ india.mt.nes$sample)
reg.india.age.mt.wtd<-svyglm(age~sample, design= india.mt.nes.design)
reg.india.age.qt<-lm(india.qt.nes$age~ india.qt.nes$sample)
reg.india.age.qt.wtd<-svyglm(age~sample, design= india.qt.nes.design)

# Income
reg.india.income.fb<-lm(india.fb.nes$income~ india.fb.nes$sample)
reg.india.income.fb.wtd<-svyglm(income~sample, design= india.fb.nes.design)
reg.india.income.mt<-lm(india.mt.nes$income~ india.mt.nes$sample)
reg.india.income.mt.wtd<-svyglm(income~sample, design= india.mt.nes.design)
reg.india.income.qt<-lm(india.qt.nes$income~ india.qt.nes$sample)
reg.india.income.qt.wtd<-svyglm(income~sample, design= india.qt.nes.design)

# Sex
reg.india.male.fb<-lm(india.fb.nes$male~ india.fb.nes$sample)
reg.india.male.fb.wtd<-svyglm(male~sample, design= india.fb.nes.design)
reg.india.male.mt<-lm(india.mt.nes$male~ india.mt.nes$sample)
reg.india.male.mt.wtd<-svyglm(male~sample, design= india.mt.nes.design)
reg.india.male.qt<-lm(india.qt.nes$male~ india.qt.nes$sample)
reg.india.male.qt.wtd<-svyglm(male~sample, design= india.qt.nes.design)

# Marital status (NOTE: we used the census wording for this one)
reg.india.married.fb<-lm(india.fb.nes$married~ india.fb.nes$sample)
reg.india.married.fb.wtd<-svyglm(married~sample, design= india.fb.nes.design)
reg.india.married.mt<-lm(india.mt.nes$married~ india.mt.nes$sample)
reg.india.married.mt.wtd<-svyglm(married~sample, design= india.mt.nes.design)
reg.india.married.qt<-lm(india.qt.nes$married~ india.qt.nes$sample)
reg.india.married.qt.wtd<-svyglm(married~sample, design= india.qt.nes.design)

# Caste
reg.india.lowercaste.fb<-lm(india.fb.nes$lowercaste~ india.fb.nes$sample)
reg.india.lowercaste.fb.wtd<-svyglm(lowercaste~sample, design= india.fb.nes.design)
reg.india.lowercaste.mt<-lm(india.mt.nes$lowercaste~ india.mt.nes$sample)
reg.india.lowercaste.mt.wtd<-svyglm(lowercaste~sample, design= india.mt.nes.design)
reg.india.lowercaste.qt<-lm(india.qt.nes$lowercaste~ india.qt.nes$sample)
reg.india.lowercaste.qt.wtd<-svyglm(lowercaste~sample, design= india.qt.nes.design)

# Population density (one-sample because we don't have District for the NES)
reg.india.density.fb<-lm(I(log(india.fb.nes$density))~ 1)
reg.india.density.fb.wtd<-svyglm(I(log(density))~1, design= india.fb.nes.design)
reg.india.density.mt<-lm(I(log(india.mt.nes$density))~ 1)
reg.india.density.mt.wtd<-svyglm(I(log(density))~1, design= india.mt.nes.design)
reg.india.density.qt<-lm(I(log(india.qt.nes$density))~ 1)
reg.india.density.qt.wtd<-svyglm(I(log(density))~1, design= india.qt.nes.design)

# Population (one-sample because we don't have District for the NES)
reg.india.pop.fb<-lm(I(log(india.fb.nes$pop))~ 1)
reg.india.pop.fb.wtd<-svyglm(I(log(pop))~1, design= india.fb.nes.design)
reg.india.pop.mt<-lm(I(log(india.mt.nes$pop))~ 1)
reg.india.pop.mt.wtd<-svyglm(I(log(pop))~1, design= india.mt.nes.design)
reg.india.pop.qt<-lm(I(log(india.qt.nes$pop))~ 1)
reg.india.pop.qt.wtd<-svyglm(I(log(pop))~1, design= india.qt.nes.design)

# Gather coefficients for table

est.india.nes<-round(c(sapply(list(reg.india.edu.fb.nes, reg.india.age.fb, reg.india.income.fb, reg.india.male.fb, reg.india.married.fb, reg.india.lowercaste.fb), function(x) coef(x)[1]), NA, NA),2)
est.india.fb<-round(sapply(list(reg.india.edu.fb.nes, reg.india.age.fb, reg.india.income.fb, reg.india.male.fb, reg.india.married.fb, reg.india.lowercaste.fb, reg.india.density.fb, reg.india.pop.fb), function(x) sum(coef(x))),2)
pval.india.fb<-sapply(list(reg.india.edu.fb.nes, reg.india.age.fb, reg.india.income.fb, reg.india.male.fb, reg.india.married.fb, reg.india.lowercaste.fb, reg.india.density.fb, reg.india.pop.fb), pval)
est.india.fb.wtd<-round(sapply(list(reg.india.edu.fb.nes.wtd, reg.india.age.fb.wtd, reg.india.income.fb.wtd, reg.india.male.fb.wtd, reg.india.married.fb.wtd, reg.india.lowercaste.fb.wtd, reg.india.density.fb.wtd, reg.india.pop.fb.wtd), function(x) sum(coef(x))),2)
pval.india.fb.wtd<-sapply(list(reg.india.edu.fb.nes.wtd, reg.india.age.fb.wtd, reg.india.income.fb.wtd, reg.india.male.fb.wtd, reg.india.married.fb.wtd, reg.india.lowercaste.fb.wtd, reg.india.density.fb.wtd, reg.india.pop.fb.wtd), pval)
est.india.mt<-round(sapply(list(reg.india.edu.mt.nes, reg.india.age.mt, reg.india.income.mt, reg.india.male.mt, reg.india.married.mt, reg.india.lowercaste.mt, reg.india.density.mt, reg.india.pop.mt), function(x) sum(coef(x))),2)
pval.india.mt<-sapply(list(reg.india.edu.mt.nes, reg.india.age.mt, reg.india.income.mt, reg.india.male.mt, reg.india.married.mt, reg.india.lowercaste.mt, reg.india.density.mt, reg.india.pop.mt), pval)
est.india.mt.wtd<-round(sapply(list(reg.india.edu.mt.nes.wtd, reg.india.age.mt.wtd, reg.india.income.mt.wtd, reg.india.male.mt.wtd, reg.india.married.mt.wtd, reg.india.lowercaste.mt.wtd, reg.india.density.mt.wtd, reg.india.pop.mt.wtd), function(x) sum(coef(x))),2)
pval.india.mt.wtd<-sapply(list(reg.india.edu.mt.nes.wtd, reg.india.age.mt.wtd, reg.india.income.mt.wtd, reg.india.male.mt.wtd, reg.india.married.mt.wtd, reg.india.lowercaste.mt.wtd, reg.india.density.mt.wtd, reg.india.pop.mt.wtd), pval)
est.india.qt<-round(sapply(list(reg.india.edu.qt.nes, reg.india.age.qt, reg.india.income.qt, reg.india.male.qt, reg.india.married.qt, reg.india.lowercaste.qt, reg.india.density.qt, reg.india.pop.qt), function(x) sum(coef(x))),2)
pval.india.qt<-sapply(list(reg.india.edu.qt.nes, reg.india.age.qt, reg.india.income.qt, reg.india.male.qt, reg.india.married.qt, reg.india.lowercaste.qt, reg.india.density.qt, reg.india.pop.qt), pval)
est.india.qt.wtd<-round(sapply(list(reg.india.edu.qt.nes.wtd, reg.india.age.qt.wtd, reg.india.income.qt.wtd, reg.india.male.qt.wtd, reg.india.married.qt.wtd, reg.india.lowercaste.qt.wtd, reg.india.density.qt.wtd, reg.india.pop.qt.wtd), function(x) sum(coef(x))),2)
pval.india.qt.wtd<-sapply(list(reg.india.edu.qt.nes.wtd, reg.india.age.qt.wtd, reg.india.income.qt.wtd, reg.india.male.qt.wtd, reg.india.married.qt.wtd, reg.india.lowercaste.qt.wtd, reg.india.density.qt.wtd, reg.india.pop.qt.wtd), pval)

pval.adj.india<-p.adjust(c(pval.india.fb, pval.india.fb.wtd, pval.india.mt, pval.india.mt.wtd, pval.india.qt, pval.india.qt.wtd),method='BH')
stars.india<-data.frame(matrix(stars(pval.adj.india),nrow=length(est.india.nes)))
names(stars.india)<-c('fb','fb.wtd','mt','mt.wtd','qt','qt.wtd')

india.demog.table<-data.frame(nes=as.character(est.india.nes),fb=paste(est.india.fb,stars.india$fb,sep=''), fb.wtd=paste(est.india.fb.wtd,stars.india$fb.wtd,sep=''), mt=paste(est.india.mt,stars.india$mt,sep=''), mt.wtd=paste(est.india.mt.wtd,stars.india$mt.wtd,sep=''), qt=paste(est.india.qt,stars.india$qt,sep=''), qt.wtd=paste(est.india.qt.wtd,stars.india$qt.wtd,sep=''),stringsAsFactors=F)
rownames(india.demog.table)<-c('Education (0--8)','Age Range (1--5)','Income (1--8)','Male','Married','Lower Caste','Log Pop. Density','Log Population')
colnames(india.demog.table)<-c('Probability\nSample','Facebook','Facebook\nWeighted','MTurk','MTurk\nWeighted','Qualtrics','Qualtrics\nWeighted')

# Bold results that are consistent with hypotheses; underline results that go against.
india.hypo.table<-matrix(
	c(NA,T,NA,T,NA,NA,NA,
	NA,T,NA,T,NA,NA,NA,
	NA,NA,NA,T,NA,NA,NA,
	NA,T,NA,T,NA,NA,NA,
	NA,T,NA,T,NA,NA,NA,
	NA,NA,NA,T,NA,NA,NA,
	NA,NA,NA,NA,NA,NA,NA,
	NA,NA,NA,NA,NA,NA,NA), byrow=T,nrow=length(est.india.nes))

for (i in 1:nrow(india.demog.table)){
	for (j in 1:ncol(india.demog.table)){
		if (is.na(india.hypo.table[i,j])) next
		india.demog.table[i,j]<-ifelse(india.hypo.table[i,j], paste0('\\textbf{', india.demog.table[i,j],'}'), paste0('\\underline{', india.demog.table[i,j],'}'))
	}
}

india.demog.table.note<-"NOTE: Entries are mean or weighted mean values for each sample. Probability sample is the 2014 Indian National Election Study (pre-poll for Married; post-poll for other variables). Population and population density are those of the District associated with the respondent's PIN code, in residents per square kilometer. Weights are post-stratification, based on region (5 categories), age (above/below median), and sex in the 2011 census. Stars report significance levels from two-tailed difference-in-means t-tests comparing the probability and convenience sample quantities, with adjustment for multiple comparisons using the Benjamini-Hochberg method (all hypotheses in the table are a single family). Results in bold type support pre-registered hypotheses, underlined results contradict pre-registered hypotheses, and those in regular font were not pre-registered. $\\dagger$ p$<$0.1, *p$<$0.05, **p$<$0.01, ***p$<$0.001."

india.demog.table.latex<-latex(india.demog.table,file='india_demog_table.tex', rowlabel = '', collabel.just=rep('c',ncol(india.demog.table)), col.just=rep('c',ncol(india.demog.table)), caption = 'Indian Demographics: Convenience versus Probability Samples', extracolsize='normalsize', insert.bottom= india.demog.table.note, booktabs = F, ctable = T, where = "htp")

##############################################################
# Main Text Figure 2 and Appendix Table 9: Indian Demographics
##############################################################

# Weighted design objects, convenience samples only
india.fb.design<-svydesign(ids=~1,weights= india1$weight[india1$sample=='Facebook'],data= india1[india1$sample=='Facebook',])
india.mt.design<-svydesign(ids=~1,weights= india1$weight[india1$sample=='MTurk'],data= india1[india1$sample=='MTurk',])
india.qt.design<-svydesign(ids=~1,weights= india1$weight[india1$sample=='Qualtrics'],data= india1[india1$sample=='Qualtrics',])

# Education
mean.india.edu.nes<-with(india.fb.nes[india.fb.nes$sample==' NES',],lm(I(Q21/8)~1))
mean.india.edu.fb<-with(india.fb.nes[india.fb.nes$sample=='Facebook',],lm(I(Q21/8)~1))
mean.india.edu.mt<-with(india.mt.nes[india.mt.nes$sample=='MTurk',],lm(I(Q21/8)~1))
mean.india.edu.qt<-with(india.qt.nes[india.qt.nes$sample=='Qualtrics',],lm(I(Q21/8)~1))

mean.india.edu.fb.wtd<-svyglm(I(Q21/8)~1, design= india.fb.design)
mean.india.edu.mt.wtd<-svyglm(I(Q21/8)~1, design= india.mt.design)
mean.india.edu.qt.wtd<-svyglm(I(Q21/8)~1, design= india.qt.design)

sd.india.edu.nes<-with(india.fb.nes[india.fb.nes$sample==' NES',],sd(I(Q21/8),na.rm=T))

# Age
mean.india.age.nes<-with(india.fb.nes[india.fb.nes$sample==' NES',],lm(I((age-1)/4)~1))
mean.india.age.fb<-with(india.fb.nes[india.fb.nes$sample=='Facebook',],lm(I((age-1)/4)~1))
mean.india.age.mt<-with(india.mt.nes[india.mt.nes$sample=='MTurk',],lm(I((age-1)/4)~1))
mean.india.age.qt<-with(india.qt.nes[india.qt.nes$sample=='Qualtrics',],lm(I((age-1)/4)~1))

mean.india.age.fb.wtd<-svyglm(I((age-1)/4)~1, design= india.fb.design)
mean.india.age.mt.wtd<-svyglm(I((age-1)/4)~1, design= india.mt.design)
mean.india.age.qt.wtd<-svyglm(I((age-1)/4)~1, design= india.qt.design)

sd.india.age.nes<-with(india.fb.nes[india.fb.nes$sample==' NES',],sd(I((age-1)/4),na.rm=T))

# Income
mean.india.income.nes<-with(india.fb.nes[india.fb.nes$sample==' NES',],lm(I((income-1)/7)~1))
mean.india.income.fb<-with(india.fb.nes[india.fb.nes$sample=='Facebook',],lm(I((income-1)/7)~1))
mean.india.income.mt<-with(india.mt.nes[india.mt.nes$sample=='MTurk',],lm(I((income-1)/7)~1))
mean.india.income.qt<-with(india.qt.nes[india.qt.nes$sample=='Qualtrics',],lm(I((income-1)/7)~1))

mean.india.income.fb.wtd<-svyglm(I((income-1)/7)~1, design= india.fb.design)
mean.india.income.mt.wtd<-svyglm(I((income-1)/7)~1, design= india.mt.design)
mean.india.income.qt.wtd<-svyglm(I((income-1)/7)~1, design= india.qt.design)

sd.india.income.nes<-with(india.fb.nes[india.fb.nes$sample==' NES',],sd(I((income-1)/7),na.rm=T))

# Sex
mean.india.male.nes<-with(india.fb.nes[india.fb.nes$sample==' NES',],lm(male~1))
mean.india.male.fb<-with(india.fb.nes[india.fb.nes$sample=='Facebook',],lm(male~1))
mean.india.male.mt<-with(india.mt.nes[india.mt.nes$sample=='MTurk',],lm(male~1))
mean.india.male.qt<-with(india.qt.nes[india.qt.nes$sample=='Qualtrics',],lm(male~1))

mean.india.male.fb.wtd<-svyglm(male~1, design= india.fb.design)
mean.india.male.mt.wtd<-svyglm(male~1, design= india.mt.design)
mean.india.male.qt.wtd<-svyglm(male~1, design= india.qt.design)

sd.india.male.nes<-with(india.fb.nes[india.fb.nes$sample==' NES',],sd(male))

# Marital status
mean.india.married.nes<-with(india.fb.nes[india.fb.nes$sample==' NES',],lm(married~1))
mean.india.married.fb<-with(india.fb.nes[india.fb.nes$sample=='Facebook',],lm(married~1))
mean.india.married.mt<-with(india.mt.nes[india.mt.nes$sample=='MTurk',],lm(married~1))
mean.india.married.qt<-with(india.qt.nes[india.qt.nes$sample=='Qualtrics',],lm(married~1))

mean.india.married.fb.wtd<-svyglm(married~1, design= india.fb.design)
mean.india.married.mt.wtd<-svyglm(married~1, design= india.mt.design)
mean.india.married.qt.wtd<-svyglm(married~1, design= india.qt.design)

sd.india.married.nes<-with(india.fb.nes[india.fb.nes$sample==' NES',],sd(married,na.rm=T))

# Caste
mean.india.lowercaste.nes<-with(india.fb.nes[india.fb.nes$sample==' NES',],lm(lowercaste~1))
mean.india.lowercaste.fb<-with(india.fb.nes[india.fb.nes$sample=='Facebook',],lm(lowercaste~1))
mean.india.lowercaste.mt<-with(india.mt.nes[india.mt.nes$sample=='MTurk',],lm(lowercaste~1))
mean.india.lowercaste.qt<-with(india.qt.nes[india.qt.nes$sample=='Qualtrics',],lm(lowercaste~1))

mean.india.lowercaste.fb.wtd<-svyglm(lowercaste~1, design= india.fb.design)
mean.india.lowercaste.mt.wtd<-svyglm(lowercaste~1, design= india.mt.design)
mean.india.lowercaste.qt.wtd<-svyglm(lowercaste~1, design= india.qt.design)

sd.india.lowercaste.nes<-with(india.fb.nes[india.fb.nes$sample==' NES',],sd(lowercaste, na.rm=T))

# Population density. Scaling based on empirical range in the population.
load('census_pop_districts.RData')
dens.range<-range(log((districts$Total.Population.Person/districts$Area)[districts$Total.Population.Person!=0 & districts$Area != 0]),na.rm=T)

mean.india.density.fb<-with(india.fb.nes[india.fb.nes$sample=='Facebook',],lm(I((log(density)-dens.range[1])/diff(dens.range))~1))
mean.india.density.mt<-with(india.mt.nes[india.mt.nes$sample=='MTurk',],lm(I((log(density)-dens.range[1])/diff(dens.range))~1))
mean.india.density.qt<-with(india.qt.nes[india.qt.nes$sample=='Qualtrics',],lm(I((log(density)-dens.range[1])/diff(dens.range))~1))

# Helper function since svyglm requires all variables to be in design= argument
scaledensity_in<-function(x) (log(x)-dens.range[1])/(diff(dens.range))

mean.india.density.fb.wtd<-svyglm(scaledensity_in(density)~1, design= india.fb.design)
mean.india.density.mt.wtd<-svyglm(scaledensity_in(density)~1, design= india.mt.design)
mean.india.density.qt.wtd<-svyglm(scaledensity_in(density)~1, design= india.qt.design)

# Gather coefficients

mean.india.nes<-c(sapply(list(mean.india.edu.nes, mean.india.age.nes, mean.india.income.nes, mean.india.male.nes, mean.india.married.nes, mean.india.lowercaste.nes),coef),NA)
mean.india.fb<-sapply(list(mean.india.edu.fb, mean.india.age.fb, mean.india.income.fb, mean.india.male.fb, mean.india.married.fb, mean.india.lowercaste.fb, mean.india.density.fb),coef)
mean.india.mt<-sapply(list(mean.india.edu.mt, mean.india.age.mt, mean.india.income.mt, mean.india.male.mt, mean.india.married.mt, mean.india.lowercaste.mt, mean.india.density.mt),coef)
mean.india.qt<-sapply(list(mean.india.edu.qt, mean.india.age.qt, mean.india.income.qt, mean.india.male.qt, mean.india.married.qt, mean.india.lowercaste.qt, mean.india.density.qt),coef)

se.india.nes<-c(sapply(list(mean.india.edu.nes, mean.india.age.nes, mean.india.income.nes, mean.india.male.nes, mean.india.married.nes, mean.india.lowercaste.nes),function(x) coef(summary(x))[2]),NA)
se.india.fb<-sapply(list(mean.india.edu.fb, mean.india.age.fb, mean.india.income.fb, mean.india.male.fb, mean.india.married.fb, mean.india.lowercaste.fb, mean.india.density.fb),function(x) coef(summary(x))[2])
se.india.mt<-sapply(list(mean.india.edu.mt, mean.india.age.mt, mean.india.income.mt, mean.india.male.mt, mean.india.married.mt, mean.india.lowercaste.mt, mean.india.density.mt),function(x) coef(summary(x))[2])
se.india.qt<-sapply(list(mean.india.edu.qt, mean.india.age.qt, mean.india.income.qt, mean.india.male.qt, mean.india.married.qt, mean.india.lowercaste.qt, mean.india.density.qt),function(x) coef(summary(x))[2])

mean.india.fb.wtd<-sapply(list(mean.india.edu.fb.wtd, mean.india.age.fb.wtd, mean.india.income.fb.wtd, mean.india.male.fb.wtd, mean.india.married.fb.wtd, mean.india.lowercaste.fb.wtd, mean.india.density.fb.wtd),coef)
mean.india.mt.wtd<-sapply(list(mean.india.edu.mt.wtd, mean.india.age.mt.wtd, mean.india.income.mt.wtd, mean.india.male.mt.wtd, mean.india.married.mt.wtd, mean.india.lowercaste.mt.wtd, mean.india.density.mt.wtd),coef)
mean.india.qt.wtd<-sapply(list(mean.india.edu.qt.wtd, mean.india.age.qt.wtd, mean.india.income.qt.wtd, mean.india.male.qt.wtd, mean.india.married.qt.wtd, mean.india.lowercaste.qt.wtd, mean.india.density.qt.wtd),coef)
se.india.fb.wtd<-sapply(list(mean.india.edu.fb.wtd, mean.india.age.fb.wtd, mean.india.income.fb.wtd, mean.india.male.fb.wtd, mean.india.married.fb.wtd, mean.india.lowercaste.fb.wtd, mean.india.density.fb.wtd),function(x) coef(summary(x))[2])
se.india.mt.wtd<-sapply(list(mean.india.edu.mt.wtd, mean.india.age.mt.wtd, mean.india.income.mt.wtd, mean.india.male.mt.wtd, mean.india.married.mt.wtd, mean.india.lowercaste.mt.wtd, mean.india.density.mt.wtd),function(x) coef(summary(x))[2])
se.india.qt.wtd<-sapply(list(mean.india.edu.qt.wtd, mean.india.age.qt.wtd, mean.india.income.qt.wtd, mean.india.male.qt.wtd, mean.india.married.qt.wtd, mean.india.lowercaste.qt.wtd, mean.india.density.qt.wtd),function(x) coef(summary(x))[2])

sd.india.nes<-c(sd.india.edu.nes, sd.india.age.nes, sd.india.income.nes, sd.india.male.nes, sd.india.married.nes, sd.india.lowercaste.nes,NA)

diff.india.fb<-abs(mean.india.fb-mean.india.nes)
diff.india.mt<-abs(mean.india.mt-mean.india.nes)
diff.india.qt<-abs(mean.india.qt-mean.india.nes)
which.closest<-apply(cbind(diff.india.fb,diff.india.mt,diff.india.qt),1, which.min)
col.india.fb<-ifelse(which.closest==1,'red','black')
col.india.mt<-ifelse(which.closest==2,'red','black')
col.india.qt<-ifelse(which.closest==3,'red','black')
col.india.fb[is.na(col.india.fb)]<-'black'
col.india.mt[is.na(col.india.mt)]<-'black'
col.india.qt[is.na(col.india.qt)]<-'black'

std.diff.india.fb<-(mean.india.fb-mean.india.nes)/sd.india.nes
std.diff.india.mt<-(mean.india.mt-mean.india.nes)/sd.india.nes
std.diff.india.qt<-(mean.india.qt-mean.india.nes)/sd.india.nes
std.diff.india.fb.wtd<-(mean.india.fb.wtd-mean.india.nes)/sd.india.nes
std.diff.india.mt.wtd<-(mean.india.mt.wtd-mean.india.nes)/sd.india.nes
std.diff.india.qt.wtd<-(mean.india.qt.wtd-mean.india.nes)/sd.india.nes

# Standardized difference table (Appendix Table 9)

india.stddiff.demog.table<-round(data.frame(fb= std.diff.india.fb, fb.wtd= std.diff.india.fb.wtd, mt= std.diff.india.mt, mt.wtd= std.diff.india.mt.wtd, qt= std.diff.india.qt, qt.wtd= std.diff.india.qt.wtd),2)
india.stddiff.demog.table<-india.stddiff.demog.table[-1*nrow(india.stddiff.demog.table),] # Population density NA because no population estimate
rownames(india.stddiff.demog.table)<-c('Education','Age Range','Income','Male','Married','Lower Caste')
colnames(india.stddiff.demog.table)<-c('Facebook','Facebook\nWeighted','MTurk','MTurk\nWeighted','Qualtrics','Qualtrics\nWeighted')

india.stddiff.demog.table.note<-'NOTE: Entries are mean differences between the convenience and probability sample figures listed in Table \\ref{india.demog.table}, divided by the standard deviation in the probability sample.'

india.stddiff.demog.table.latex<-latex(india.stddiff.demog.table,file='india_stddiff_demog_table.tex', rowlabel = '', collabel.just=rep('c',ncol(india.stddiff.demog.table)), col.just=rep('c',ncol(india.stddiff.demog.table)), caption = 'Indian Demographics: Standardized Difference from Probability Samples', extracolsize='normalsize', insert.bottom= india.stddiff.demog.table.note, booktabs = F, ctable = T, where = "htp")

# Plot (Main Text Figure 2)

lwd<-1.5
adjust<-0.15

pdf(file='india_demog.pdf',width=10,height=7)
par(mfrow=c(1,2))

par(mar=c(3,6,2,0)+.01)
plot(mean.india.nes, 7:1+1.5*adjust, type='p',axes=F,frame.plot=T,xlab='',ylab='',pch=19,xlim=0:1,ylim=c(1-1.5*adjust,7+1.5*adjust),main='Unweighted',sub='')
Axis(side=1,at=seq(0,1,.25),labels= seq(0,1,.25))
Axis(side=2,at=c(8:0),labels=c('','Education','Age Range','Income','Male','Married','Lower Caste','Population\nDensity',''),las=1)
abline(h=1:6+.5,lwd=1,lty=3)
segments(mean.india.nes-qnorm(.975)*se.india.nes,7:1+1.5*adjust, mean.india.nes+qnorm(.975)*se.india.nes,7:1+1.5*adjust,lwd=lwd)
segments(mean.india.fb-qnorm(.975)*se.india.fb,7:1+0.5*adjust, mean.india.fb+qnorm(.975)*se.india.fb,7:1+0.5*adjust,lwd=lwd)
segments(mean.india.mt-qnorm(.975)*se.india.mt,7:1-0.5*adjust, mean.india.mt+qnorm(.975)*se.india.mt,7:1-0.5*adjust,lwd=lwd)
segments(mean.india.qt-qnorm(.975)*se.india.qt,7:1-1.5*adjust, mean.india.qt+qnorm(.975)*se.india.qt,7:1-1.5*adjust,lwd=lwd)
points(mean.india.fb, 7:1+0.5*adjust, pch=21, bg='white')
points(mean.india.mt, 7:1-0.5*adjust, pch=8)
points(mean.india.qt, 7:1-1.5*adjust, pch=25, bg='white')

par(mar=c(3,2,2,4)+.01)
plot(mean.india.nes, 7:1+1.5*adjust, type='p',axes=F,frame.plot=T,xlab='',ylab='',pch=19,xlim=0:1,ylim=c(1-1.5*adjust,7+1.5*adjust),main='Weighted',sub='')
Axis(side=1,at=seq(0,1,.25),labels= seq(0,1,.25))
abline(h=1:6+.5,lwd=1,lty=3)
segments(mean.india.nes-qnorm(.975)*se.india.nes,7:1+1.5*adjust, mean.india.nes+qnorm(.975)*se.india.nes,7:1+1.5*adjust,lwd=lwd)
segments(mean.india.fb.wtd-qnorm(.975)*se.india.fb.wtd,7:1+0.5*adjust, mean.india.fb.wtd+qnorm(.975)*se.india.fb.wtd,7:1+0.5*adjust,lwd=lwd)
segments(mean.india.mt.wtd-qnorm(.975)*se.india.mt.wtd,7:1-0.5*adjust, mean.india.mt.wtd+qnorm(.975)*se.india.mt.wtd,7:1-0.5*adjust,lwd=lwd)
segments(mean.india.qt.wtd-qnorm(.975)*se.india.qt.wtd,7:1-1.5*adjust, mean.india.qt.wtd+qnorm(.975)*se.india.qt.wtd,7:1-1.5*adjust,lwd=lwd)
points(mean.india.fb.wtd, 7:1+0.5*adjust, pch=21, bg='white')
points(mean.india.mt.wtd, 7:1-0.5*adjust, pch=8)
points(mean.india.qt.wtd, 7:1-1.5*adjust, pch=25, bg='white')
legend('topleft',pch=c(19,21,8,25),legend=c('Probability','Facebook','MTurk','Qualtrics'),title='Sample',bg='white')
dev.off()

#############################################
# Appendix Figure 2: US distribution by state
#############################################

uspop<-readHTMLTable('Resident Population Data (Text Version) - 2010 Census.html')
uspop<-uspop[[1]][,c('State or Region','2010')]
names(uspop)<-c('state','pop')
uspop<-uspop[!uspop$state=='Percent Change',]
uspop$state[which(!uspop$state=='Population')+1]<-uspop$state[which(!uspop$state=='Population')]
uspop<-uspop[!is.na(uspop$pop),]
uspop<-uspop[!uspop$state %in% c('United States','Northeast','Midwest','South','West','Puerto Rico'),]
uspop$pop<-as.numeric(gsub(',','',uspop$pop))
uspop$pct<-100*uspop$pop/sum(uspop$pop)
uspop<-uspop[order(uspop$state),]
abbrevs<-readHTMLTable('State Abbreviations.html')
abbrevs<-abbrevs[[1]][abbrevs[[1]][,2]!='Abbreviation:',]
names(abbrevs)<-c('state','abbrev')
uspop<-merge(uspop,abbrevs)

us1$state<-as.numeric(us1$Q27)
us1$state[us1$state==40]<-NA # Removing PR; likely not in sampling frame for ANES
us1$state[which(us1$state > 40)]<-us1$state[which(us1$state > 40)] - 1
us1$state<-factor(us1$state,levels=1:51,labels=uspop$state)

anes$state<-factor(anes$sample_state,levels=uspop$abbrev,labels=uspop$state)

# Barplot

us.state.vals<-rbind(Census=uspop$pct,100*prop.table(table(us1$sample[us1$nontarg],us1$state[us1$nontarg]),1))
colnames(us.state.vals)<-gsub('District','Dist.',colnames(us.state.vals))
pdf(file='us_states_oursurvey.pdf',width=11,height=7)
par(mar=c(9,4.2,0,0)+.1)
barplot(us.state.vals,beside=T,las=3,legend.text=T,ylab='Percent',main='',args.legend=list(x='topright',cex=1.5),cex.lab=1.5,cex.axis=1.5,cex.names=1.2)
dev.off()

################################################
# Appendix Figure 3: India distribution by state
################################################

capitalize<-function(x){
	sapply(lapply(strsplit(x,' '), function(z) paste(toupper(substr(z,1,1)), tolower(substr(z,2,nchar(z))),sep='')),paste,collapse=' ')
}

load('census_pop_districts.RData')
states<-districts[districts$Level=='STATE'& districts$TRU=='Total',c('Name','Total.Population.Person')]
states$Name[states$Name=='NCT OF DELHI']<-'DELHI'
states$Name<-capitalize(states$Name)
states<-states[order(states$Name),]
states<-states[c(1:32,34,33,35),] #Swap order of UTTARAKHAND and UTTAR PRADESH to match questionnaire
states$prop<-states$Total.Population.Person/sum(states$Total.Population.Person)

india1$state<-as.numeric(india1$Q24)
india1$state<-ifelse(india1$state==32,2, india1$state) # New state of Telangana carved out of Andhra Pradesh in 2014; remerging to compare to 2011 census population figures
india1$state<-ifelse(india1$state >= 33, india1$state-1, india1$state)
india1$state<-factor(india1$state,levels=1:35, labels= states$Name)

wvs$state<-gsub('IN: ','', wvs$V256)
wvs$state<-gsub('NCT of ','',wvs$state)
wvs$state[wvs$state=='Chattisgarh']<-'Chhattisgarh'
wvs$state[wvs$state=='Orrisa']<-'Odisha'
wvs$state<-factor(wvs$state,levels= states$Name)

# Barplot

state.vals<-100*rbind(Census=states$prop,prop.table(table(india1$sample,india1$state),1))
colnames(state.vals)<-gsub('Islands','Is.',colnames(state.vals))
pdf(file='india_states_oursurvey.pdf',width=11,height=7)
par(mar=c(11,4.2,0,0)+.1)
barplot(state.vals,beside=T,las=3,legend.text=T,ylab='Percent',main='',args.legend=list(x='topleft',cex=1.5),cex.lab=1.5,cex.axis=1.5,cex.names=1.2)
dev.off()

####################################################################
# Appendix Figure 4 and Appendix Tables 12-13: Distribution by state
####################################################################

options(scipen=999) # Turn off scientific notation

# Set seed for replicating bootstrap of Cramer's V and Phi sampling distributions

set.seed(959284)

# India Cramer's V stats

india.cramer.wvs<-cramersV(table(wvs$state),p=states$prop)
india.cramer<-tapply(india1$state,india1$sample,function(x) cramersV(table(x),p=states$prop))
india.cramer.wvs.boot<-sapply(1:1000,function(x) cramersV(table(sample(wvs$state,replace=T)),p=states$prop))
india.cramer.fb.boot<-sapply(1:1000,function(x) cramersV(table(sample(india1$state[india1$sample=='Facebook'],replace=T)),p=states$prop))
india.cramer.mt.boot<-sapply(1:1000,function(x) cramersV(table(sample(india1$state[india1$sample=='MTurk'],replace=T)),p=states$prop))
india.cramer.qt.boot<-sapply(1:1000,function(x) cramersV(table(sample(india1$state[india1$sample=='Qualtrics'],replace=T)),p=states$prop))
india.cramer.fbwvs.boot<-sapply(1:1000,function(x) cramersV(table(sample(india1$state[india1$sample=='Facebook'],replace=T)),p=states$prop) - cramersV(table(sample(wvs$state,replace=T)),p=states$prop))
india.cramer.mtfb.boot<-sapply(1:1000,function(x) cramersV(table(sample(india1$state[india1$sample=='MTurk'],replace=T)),p=states$prop) - cramersV(table(sample(india1$state[india1$sample=='Facebook'],replace=T)),p=states$prop))
india.cramer.mtqt.boot<-sapply(1:1000,function(x) cramersV(table(sample(india1$state[india1$sample=='MTurk'],replace=T)),p=states$prop) - cramersV(table(sample(india1$state[india1$sample=='Qualtrics'],replace=T)),p=states$prop))
india.cramer.qtfb.boot<-sapply(1:1000,function(x) cramersV(table(sample(india1$state[india1$sample=='Qualtrics'],replace=T)),p=states$prop) - cramersV(table(sample(india1$state[india1$sample=='Facebook'],replace=T)),p=states$prop))

# India Phi stats

phi.test<-function(x,p) sqrt(chisq.test(x,p=p)$statistic/sum(x))

india.phi.wvs<-phi.test(table(wvs$state),p=states$prop)
india.phi<-tapply(india1$state,india1$sample,function(x) phi.test(table(x),p=states$prop))
india.phi.wvs.boot<-sapply(1:1000,function(x) phi.test(table(sample(wvs$state,replace=T)),p=states$prop))
india.phi.fb.boot<-sapply(1:1000,function(x) phi.test(table(sample(india1$state[india1$sample=='Facebook'],replace=T)),p=states$prop))
india.phi.mt.boot<-sapply(1:1000,function(x) phi.test(table(sample(india1$state[india1$sample=='MTurk'],replace=T)),p=states$prop))
india.phi.qt.boot<-sapply(1:1000,function(x) phi.test(table(sample(india1$state[india1$sample=='Qualtrics'],replace=T)),p=states$prop))
india.phi.fbwvs.boot<-sapply(1:1000,function(x) phi.test(table(sample(india1$state[india1$sample=='Facebook'],replace=T)),p=states$prop) - phi.test(table(sample(wvs$state,replace=T)),p=states$prop))
india.phi.mtfb.boot<-sapply(1:1000,function(x) phi.test(table(sample(india1$state[india1$sample=='MTurk'],replace=T)),p=states$prop) - phi.test(table(sample(india1$state[india1$sample=='Facebook'],replace=T)),p=states$prop))
india.phi.mtqt.boot<-sapply(1:1000,function(x) phi.test(table(sample(india1$state[india1$sample=='MTurk'],replace=T)),p=states$prop) - phi.test(table(sample(india1$state[india1$sample=='Qualtrics'],replace=T)),p=states$prop))
india.phi.qtfb.boot<-sapply(1:1000,function(x) phi.test(table(sample(india1$state[india1$sample=='Qualtrics'],replace=T)),p=states$prop) - phi.test(table(sample(india1$state[india1$sample=='Facebook'],replace=T)),p=states$prop))

# India Cramer's V CIs

india.cramer.wvs.ci<-paste('(',paste(round(quantile(india.cramer.wvs.boot,c(.025,.975)),4),collapse=', '),')',sep='')
india.cramer.fb.ci<-paste('(',paste(round(quantile(india.cramer.fb.boot,c(.025,.975)),4),collapse=', '),')',sep='')
india.cramer.mt.ci<-paste('(',paste(round(quantile(india.cramer.mt.boot,c(.025,.975)),4),collapse=', '),')',sep='')
india.cramer.qt.ci<-paste('(',paste(round(quantile(india.cramer.qt.boot,c(.025,.975)),4),collapse=', '),')',sep='')
india.cramer.fbwvs.ci<-paste('(',paste(round(quantile(india.cramer.fbwvs.boot,c(.025,.975)),4),collapse=', '),')',sep='')
india.cramer.mtfb.ci<-paste('(',paste(round(quantile(india.cramer.mtfb.boot,c(.025,.975)),4),collapse=', '),')',sep='')
india.cramer.mtqt.ci<-paste('(',paste(round(quantile(india.cramer.mtqt.boot,c(.025,.975)),4),collapse=', '),')',sep='')
india.cramer.qtfb.ci<-paste('(',paste(round(quantile(india.cramer.qtfb.boot,c(.025,.975)),4),collapse=', '),')',sep='')

# India Phi CIs

india.phi.wvs.ci<-paste('(',paste(round(quantile(india.phi.wvs.boot,c(.025,.975)),2),collapse=', '),')',sep='')
india.phi.fb.ci<-paste('(',paste(round(quantile(india.phi.fb.boot,c(.025,.975)),2),collapse=', '),')',sep='')
india.phi.mt.ci<-paste('(',paste(round(quantile(india.phi.mt.boot,c(.025,.975)),2),collapse=', '),')',sep='')
india.phi.qt.ci<-paste('(',paste(round(quantile(india.phi.qt.boot,c(.025,.975)),2),collapse=', '),')',sep='')
india.phi.fbwvs.ci<-paste('(',paste(round(quantile(india.phi.fbwvs.boot,c(.025,.975)),2),collapse=', '),')',sep='')
india.phi.mtfb.ci<-paste('(',paste(round(quantile(india.phi.mtfb.boot,c(.025,.975)),2),collapse=', '),')',sep='')
india.phi.mtqt.ci<-paste('(',paste(round(quantile(india.phi.mtqt.boot,c(.025,.975)),2),collapse=', '),')',sep='')
india.phi.qtfb.ci<-paste('(',paste(round(quantile(india.phi.qtfb.boot,c(.025,.975)),2),collapse=', '),')',sep='')

# US Cramer's V stats

us.cramer.anes<-cramersV(table(anes$state),p=uspop$pct/100)
us.cramer<-tapply(us1$state[us1$nontarg],us1$sample[us1$nontarg],function(x) cramersV(table(x),p=uspop$pct/100))
us.cramer.anes.boot<-sapply(1:1000,function(x) cramersV(table(sample(anes$state,replace=T)),p=uspop$pct/100))
us.cramer.fb.boot<-sapply(1:1000,function(x) cramersV(table(sample(us1$state[us1$nontarg & us1$sample=='Facebook'],replace=T)),p=uspop$pct/100))
us.cramer.mt.boot<-sapply(1:1000,function(x) cramersV(table(sample(us1$state[us1$nontarg & us1$sample=='MTurk'],replace=T)),p=uspop$pct/100))
us.cramer.qt.boot<-sapply(1:1000,function(x) cramersV(table(sample(us1$state[us1$nontarg & us1$sample=='Qualtrics'],replace=T)),p=uspop$pct/100))
us.cramer.mtanes.boot<-sapply(1:1000,function(x) cramersV(table(sample(us1$state[us1$nontarg & us1$sample=='MTurk'],replace=T)),p=uspop$pct/100) - cramersV(table(sample(anes$state,replace=T)),p=uspop$pct/100))
us.cramer.mtfb.boot<-sapply(1:1000,function(x) cramersV(table(sample(us1$state[us1$nontarg & us1$sample=='MTurk'],replace=T)),p=uspop$pct/100) - cramersV(table(sample(us1$state[us1$nontarg & us1$sample=='Facebook'],replace=T)),p=uspop$pct/100))
us.cramer.mtqt.boot<-sapply(1:1000,function(x) cramersV(table(sample(us1$state[us1$nontarg & us1$sample=='MTurk'],replace=T)),p=uspop$pct/100) - cramersV(table(sample(us1$state[us1$nontarg & us1$sample=='Qualtrics'],replace=T)),p=uspop$pct/100))
us.cramer.qtfb.boot<-sapply(1:1000,function(x) cramersV(table(sample(us1$state[us1$nontarg & us1$sample=='Qualtrics'],replace=T)),p=uspop$pct/100) - cramersV(table(sample(us1$state[us1$nontarg & us1$sample=='Facebook'],replace=T)),p=uspop$pct/100))

# US Phi stats

us.phi.anes<-phi.test(table(anes$state),p=uspop$pct/100)
us.phi<-tapply(us1$state[us1$nontarg],us1$sample[us1$nontarg],function(x) phi.test(table(x),p=uspop$pct/100))
us.phi.anes.boot<-sapply(1:1000,function(x) phi.test(table(sample(anes$state,replace=T)),p=uspop$pct/100))
us.phi.fb.boot<-sapply(1:1000,function(x) phi.test(table(sample(us1$state[us1$nontarg & us1$sample=='Facebook'],replace=T)),p=uspop$pct/100))
us.phi.mt.boot<-sapply(1:1000,function(x) phi.test(table(sample(us1$state[us1$nontarg & us1$sample=='MTurk'],replace=T)),p=uspop$pct/100))
us.phi.qt.boot<-sapply(1:1000,function(x) phi.test(table(sample(us1$state[us1$nontarg & us1$sample=='Qualtrics'],replace=T)),p=uspop$pct/100))
us.phi.mtanes.boot<-sapply(1:1000,function(x) phi.test(table(sample(us1$state[us1$nontarg & us1$sample=='MTurk'],replace=T)),p=uspop$pct/100) - phi.test(table(sample(anes$state,replace=T)),p=uspop$pct/100))
us.phi.mtfb.boot<-sapply(1:1000,function(x) phi.test(table(sample(us1$state[us1$nontarg & us1$sample=='MTurk'],replace=T)),p=uspop$pct/100) - phi.test(table(sample(us1$state[us1$nontarg & us1$sample=='Facebook'],replace=T)),p=uspop$pct/100))
us.phi.mtqt.boot<-sapply(1:1000,function(x) phi.test(table(sample(us1$state[us1$nontarg & us1$sample=='MTurk'],replace=T)),p=uspop$pct/100) - phi.test(table(sample(us1$state[us1$nontarg & us1$sample=='Qualtrics'],replace=T)),p=uspop$pct/100))
us.phi.qtfb.boot<-sapply(1:1000,function(x) phi.test(table(sample(us1$state[us1$nontarg & us1$sample=='Qualtrics'],replace=T)),p=uspop$pct/100) - phi.test(table(sample(us1$state[us1$nontarg & us1$sample=='Facebook'],replace=T)),p=uspop$pct/100))

# US Cramer's V CIs

us.cramer.anes.ci<-paste('(',paste(round(quantile(us.cramer.anes.boot,c(.025,.975)),4),collapse=', '),')',sep='')
us.cramer.fb.ci<-paste('(',paste(round(quantile(us.cramer.fb.boot,c(.025,.975)),4),collapse=', '),')',sep='')
us.cramer.mt.ci<-paste('(',paste(round(quantile(us.cramer.mt.boot,c(.025,.975)),4),collapse=', '),')',sep='')
us.cramer.qt.ci<-paste('(',paste(round(quantile(us.cramer.qt.boot,c(.025,.975)),4),collapse=', '),')',sep='')
us.cramer.mtanes.ci<-paste('(',paste(round(quantile(us.cramer.mtanes.boot,c(.025,.975)),4),collapse=', '),')',sep='')
us.cramer.mtfb.ci<-paste('(',paste(round(quantile(us.cramer.mtfb.boot,c(.025,.975)),4),collapse=', '),')',sep='')
us.cramer.mtqt.ci<-paste('(',paste(round(quantile(us.cramer.mtqt.boot,c(.025,.975)),4),collapse=', '),')',sep='')
us.cramer.qtfb.ci<-paste('(',paste(round(quantile(us.cramer.qtfb.boot,c(.025,.975)),4),collapse=', '),')',sep='')

# US Phi CIs

us.phi.anes.ci<-paste('(',paste(round(quantile(us.phi.anes.boot,c(.025,.975)),2),collapse=', '),')',sep='')
us.phi.fb.ci<-paste('(',paste(round(quantile(us.phi.fb.boot,c(.025,.975)),2),collapse=', '),')',sep='')
us.phi.mt.ci<-paste('(',paste(round(quantile(us.phi.mt.boot,c(.025,.975)),2),collapse=', '),')',sep='')
us.phi.qt.ci<-paste('(',paste(round(quantile(us.phi.qt.boot,c(.025,.975)),2),collapse=', '),')',sep='')
us.phi.mtanes.ci<-paste('(',paste(round(quantile(us.phi.mtanes.boot,c(.025,.975)),2),collapse=', '),')',sep='')
us.phi.mtfb.ci<-paste('(',paste(round(quantile(us.phi.mtfb.boot,c(.025,.975)),2),collapse=', '),')',sep='')
us.phi.mtqt.ci<-paste('(',paste(round(quantile(us.phi.mtqt.boot,c(.025,.975)),2),collapse=', '),')',sep='')
us.phi.qtfb.ci<-paste('(',paste(round(quantile(us.phi.qtfb.boot,c(.025,.975)),2),collapse=', '),')',sep='')

# Combined tables (Appendix Tables 12-13)

states.cramer.table<-matrix(c(round(c(india.cramer.wvs, india.cramer, india.cramer['MTurk']-india.cramer['Facebook'], india.cramer['MTurk']-india.cramer['Qualtrics'], india.cramer['Qualtrics']-india.cramer['Facebook'], india.cramer['Facebook']-india.cramer.wvs),4), c(india.cramer.wvs.ci, india.cramer.fb.ci, india.cramer.mt.ci, india.cramer.qt.ci, india.cramer.mtfb.ci, india.cramer.mtqt.ci, india.cramer.qtfb.ci, india.cramer.fbwvs.ci), round(c(us.cramer.anes, us.cramer, us.cramer['MTurk']-us.cramer['Facebook'], us.cramer['MTurk']-us.cramer['Qualtrics'], us.cramer['Qualtrics']-us.cramer['Facebook'], us.cramer['MTurk']-us.cramer.anes),4),c(us.cramer.anes.ci, us.cramer.fb.ci, us.cramer.mt.ci, us.cramer.qt.ci, us.cramer.mtfb.ci, us.cramer.mtqt.ci, us.cramer.qtfb.ci, us.cramer.mtanes.ci)),nrow=4,byrow=T)
colnames(states.cramer.table)<-c('Probability\nSample','Facebook','MTurk','Qualtrics','MTurk--\nFacebook','MTurk--\ Qualtrics','Qualtrics--\nFacebook','Best Convenience--\nProbability')

states.phi.table<-matrix(c(round(c(india.phi.wvs, india.phi, india.phi['MTurk']-india.phi['Facebook'], india.phi['MTurk']-india.phi['Qualtrics'], india.phi['Qualtrics']-india.phi['Facebook'], india.phi['Facebook']-india.phi.wvs),2), c(india.phi.wvs.ci, india.phi.fb.ci, india.phi.mt.ci, india.phi.qt.ci, india.phi.mtfb.ci, india.phi.mtqt.ci, india.phi.qtfb.ci, india.phi.fbwvs.ci), round(c(us.phi.anes, us.phi, us.phi['MTurk']-us.phi['Facebook'], us.phi['MTurk']-us.phi['Qualtrics'], us.phi['Qualtrics']-us.phi['Facebook'], us.phi['MTurk']-us.phi.anes),2),c(us.phi.anes.ci, us.phi.fb.ci, us.phi.mt.ci, us.phi.qt.ci, us.phi.mtfb.ci, us.phi.mtqt.ci, us.phi.qtfb.ci, us.phi.mtanes.ci)),nrow=4,byrow=T)
colnames(states.phi.table)<-c('Probability\nSample','Facebook','MTurk','Qualtrics','MTurk--\nFacebook','MTurk--\ Qualtrics','Qualtrics--\nFacebook','Best Convenience--\nProbability')

states.cramer.table.note<-"NOTE: Entries are Cram\\'er's V statistics and their differences, with bootstrapped 99\\% confidence intervals (percentile method) in parentheses. Probability samples are the 2014 World Values Survey (India) and the 2012 American National Election Studies (United States)."

states.cramer.table.latex<-latex(states.cramer.table,file='states_cramer_table.tex', rowlabel = '', rowname=c('India','','U.S.',''), collabel.just=rep('c',ncol(states.cramer.table)), col.just=rep('c',ncol(states.cramer.table)), caption = 'State of Residence: Deviation from Population Proportions', extracolsize='small', insert.bottom= states.cramer.table.note, booktabs = F, ctable = T, where = "htp",landscape=T)

states.phi.table.note<-"NOTE: Entries are Phi statistics and their differences, with bootstrapped 95\\% confidence intervals (percentile method) in parentheses. Probability samples are the 2014 World Values Survey (India) and the 2012 American National Election Studies (United States)."

states.phi.table.latex<-latex(states.phi.table,file='states_phi_table.tex', rowlabel = '', rowname=c('India','','U.S.',''), collabel.just=rep('c',ncol(states.phi.table)), col.just=rep('c',ncol(states.phi.table)), caption = 'State of Residence: Deviation from Population Proportions', extracolsize='normalsize', insert.bottom= states.phi.table.note, booktabs = F, ctable = T, where = "htp",landscape=T)

options(scipen=0) # Scientific notation back to normal

# Dot-plot (Appendix Figure 4)

est.phi.rep<-c(us.phi.anes, india.phi.wvs)
ci.phi.rep<-sapply(list(us.phi.anes.boot, india.phi.wvs.boot), quantile, c(.025,.975))
est.phi.fb<-c(us.phi['Facebook'], india.phi['Facebook'])
ci.phi.fb<-sapply(list(us.phi.fb.boot, india.phi.fb.boot), quantile, c(.025,.975))
est.phi.mt<-c(us.phi['MTurk'], india.phi['MTurk'])
ci.phi.mt<-sapply(list(us.phi.mt.boot, india.phi.mt.boot), quantile, c(.025,.975))
est.phi.qt<-c(us.phi['Qualtrics'], india.phi['Qualtrics'])
ci.phi.qt<-sapply(list(us.phi.qt.boot, india.phi.qt.boot), quantile, c(.025,.975))


lwd<-1.5
adjust<-0.15
nvars<-length(est.phi.rep)

pdf(file='states_phi.pdf',width=5,height=4)
par(mar=c(5,4,2,1)+.01)
plot(est.phi.rep, nvars:1+1.5*adjust, type='p',axes=F,frame.plot=T,xlab='Phi Statistic',ylab='',pch=19,xlim=c(0,2.5),ylim=c(.5,2.5),main='',sub='')
Axis(side=1,at=seq(0,2.5,.5),labels= seq(0,2.5,.5))
Axis(side=2,at=c((nvars+1):0),labels=c('','United\nStates','India',''),las=1)
abline(h=1:(nvars-1)+.5,lwd=1,lty=3)
segments(ci.phi.rep[1,],nvars:1+1.5*adjust, ci.phi.rep[2,],nvars:1+1.5*adjust,lwd=lwd)
segments(ci.phi.fb[1,],nvars:1+0.5*adjust, ci.phi.fb[2,],nvars:1+0.5*adjust,lwd=lwd)
segments(ci.phi.mt[1,],nvars:1-0.5*adjust, ci.phi.mt[2,],nvars:1-0.5*adjust,lwd=lwd)
segments(ci.phi.qt[1,],nvars:1-1.5*adjust, ci.phi.qt[2,],nvars:1-1.5*adjust,lwd=lwd)
points(est.phi.fb, nvars:1+0.5*adjust, pch=21, bg='white')
points(est.phi.mt, nvars:1-0.5*adjust, pch=8)
points(est.phi.qt, nvars:1-1.5*adjust, pch=25, bg='white')
legend('topright',pch=c(19,21,8,25),legend=c('Probability','Facebook','MTurk','Qualtrics'),title='Sample',bg='white')
dev.off()

######################################################
# Results conveyed textually but not in tables/figures
######################################################

# Main text Research Design section: Age bias in untargeted U.S. Facebook/MTurk samples

# "After initial recruitment, we found that the age ranges in the U.S. Facebook and MTurk samples were highly restricted, but in different ways. In the Facebook sample, 90% of respondents were 55 or older." 

100*with(us1[us1$nontarg & us1$sample=='Facebook',],mean(Q3 >= 55))

# "The MTurk sample was biased in the opposition direction, with 80% of respondents being 40 or younger."

100*with(us1[us1$nontarg & us1$sample=='MTurk',],mean(Q3 <= 40))

# Main text Demographics section: Concentration of Tamil Nadu and Kerala residents in the India MTurk sample.

# "Likewise, Chennai, the capital of Tamil Nadu and the region's major base for call center operations, is only modestly overrepresented in our MTurk sample (9.4% of the Tamil Nadu respondents, versus 6.4% of the state population) but heavily overrepresented in the Facebook and Qualtrics samples (37.2% and 25.2%, respectively).

# Eliminate invalid PIN codes
india1$pin<-ifelse(!india1$Q25 %in% pincodes$pincode, NA, india1$Q25)

# Chennai as a percentage of Tamil Nadu population in the 2011 census:
100* sum(districts$Total.Population.Person[districts$Name=='Chennai'&districts$statename=='TAMIL NADU'&districts$TRU=='Total']) / sum(districts$Total.Population.Person[districts$statename=='TAMIL NADU'&districts$TRU=='Total'])

# Chennai residents as a percentage of Tamil Nadu respondents in each sample, based on PIN code
100*with(india1[india1$Q24==31,],tapply(pin, sample, function(x) mean(x %in% pincodes$pincode[pincodes$Districtname=='Chennai'])))

# "Our online survey could be taken on mobile phones as well as desktops, but only 4-7% of the MTurk and Qualtrics samples accessed the survey via mobile devices, versus 43% of the Facebook sample. 

# Easier to identify desktop operating systems
india1$desktop<-india1$OS %in% c('CrOS x86_64 7520.25.0', 'Intel Mac OS X 10_6_2', 'Linux i686', 'Linux i686 (x86_64', 'Linux x86_64', 'Macintosh', 'Ubuntu', 'Windows NT 10.0', 'Windows NT 5.1', 'Windows NT 5.2', 'Windows NT 6.0', 'Windows NT 6.1', 'Windows NT 6.2', 'Windows NT 6.3')
india1$desktop[india1$OS=='']<-NA

# Everyone not taking it on a desktop used a mobile operating system
100*(1-tapply(india1$desktop, india1$sample, mean, na.rm=T))

# Appendix Section 2: Attitudes Toward Risk 

# "To assess differences in attitudes toward risk, we included a version of the common “Asian Disease” question in psychology, using only the “lives saved” framing (Tversky and Kahneman, 1981). In both India and the United States, Facebook-recruited respondents were significantly more likely than MTurk respondents to favor the risky option for combatting this hypothetical disease (38.8% for Facebook in India, 34.2% for MTurk in India, two-tailed p = 0.032; 34.3% for Facebook in the U.S., 30.7% for MTurk in the U.S., two-tailed p = 0.030)."

t.test(india1$Q17[india1$sample=='Facebook']==2, india1$Q17[india1$sample=='MTurk']==2)
t.test(us1$Q18[us1$sample=='Facebook']==2, us1$Q18[us1$sample=='MTurk']==2)

# "Moreover, there was no significant difference vis-a-vis Qualtrics-recruited respondents (36.2% in India, two-tailed p = 0.222; 37.4% in the U.S., two-tailed p = 0.098), who were also paid individually."

t.test(india1$Q17[india1$sample=='Facebook']==2, india1$Q17[india1$sample=='Qualtrics']==2)
t.test(us1$Q18[us1$sample=='Facebook']==2, us1$Q18[us1$sample=='Qualtrics']==2)

